File Coverage

Bio/Structure/IO/pdb.pm

Criterion	Covered	Total	%
statement	595	751	79.2
branch	231	398	58.0
condition	79	165	47.8
subroutine	17	18	94.4
pod	2	2	100.0
total	924	1334	69.2

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module for Bio::Structure::IO::pdb
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Kris Boulez
7							#
8							# Copyright 2001, 2002 Kris Boulez
9							#
10							# Framework is a copy of Bio::SeqIO::embl.pm
11							#
12							# You may distribute this module under the same terms as perl itself
13
14							# POD documentation - main docs before the code
15
16							=head1 NAME
17
18							Bio::Structure::IO::pdb - PDB input/output stream
19
20							=head1 SYNOPSIS
21
22							It is probably best not to use this object directly, but
23							rather go through the Bio::Structure::IO handler system. Go:
24
25							$stream = Bio::Structure::IO->new(-file => $filename,
26							-format => 'PDB');
27
28							while (my $structure = $stream->next_structure) {
29							# do something with $structure
30							}
31
32							=head1 DESCRIPTION
33
34							This object can transform Bio::Structure objects to and from PDB flat
35							file databases. The working is similar to that of the Bio::SeqIO handlers.
36
37							=head1 FEEDBACK
38
39							=head2 Mailing Lists
40
41							User feedback is an integral part of the evolution of this and other
42							Bioperl modules. Send your comments and suggestions preferably to one
43							of the Bioperl mailing lists. Your participation is much appreciated.
44
45							bioperl-l@bioperl.org - General discussion
46							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47
48							=head2 Support
49
50							Please direct usage questions or support issues to the mailing list:
51
52							I
53
54							rather than to the module maintainer directly. Many experienced and
55							reponsive experts will be able look at the problem and quickly
56							address it. Please include a thorough description of the problem
57							with code and data examples if at all possible.
58
59							=head2 Reporting Bugs
60
61							Report bugs to the Bioperl bug tracking system to help us keep track
62							the bugs and their resolution. Bug reports can be submitted via the
63							web:
64
65							https://github.com/bioperl/bioperl-live/issues
66
67							=head1 AUTHOR - Kris Boulez
68
69							Email kris.boulez@algonomics.com
70
71							=head1 APPENDIX
72
73							The rest of the documentation details each of the object methods.
74							Internal methods are usually preceded with a _
75
76							=cut
77
78
79							# Let the code begin...
80
81
82							package Bio::Structure::IO::pdb;
83	1			1		5	use strict;
	1					2
	1					24
84	1			1		266	use Bio::Structure::Entry;
	1					1
	1					25
85							#use Bio::Structure::Model;
86							#use Bio::Structure::Chain;
87							#use Bio::Structure::Residue;
88	1			1		254	use Bio::Structure::Atom;
	1					2
	1					21
89	1			1		306	use Bio::SeqFeature::Generic;
	1					2
	1					25
90	1			1		268	use Bio::Annotation::Reference;
	1					2
	1					26
91
92	1			1		5	use base qw(Bio::Structure::IO);
	1					2
	1					6210
93
94							sub _initialize {
95	3			3		8	my($self,@args) = @_;
96
97	3					11	$self->SUPER::_initialize(@args);
98
99	3					10	my ($noheader, $noatom) =
100							$self->_rearrange([qw(
101							NOHEADER
102							NOATOM
103							)],
104							@args);
105	3	50				7	$noheader && $self->_noheader($noheader);
106	3	50				13	$noatom && $self->_noatom($noatom);
107							}
108
109
110							=head2 next_structure;
111
112							Title : next_structure
113							Usage : $struc = $stream->next_structure()
114							Function: returns the next structure in the stream
115							Returns : Bio::Structure object
116							Args :
117
118							=cut
119
120							sub next_structure {
121	2			2	1	7	my ($self,@args) = @_;
122	2					2	my ($line);
123	2					4	my ($obslte, $title, $caveat, $compnd, $source, $keywds,
124							$expdta, $author, %revdat, $revdat, $sprsde, $jrnl, %remark, $dbref,
125							$turn, $ssbond, $link, $hydbnd, $sltbrg, $cispep,
126							$site, $cryst1, $tvect,);
127	2					12	my $struc = Bio::Structure::Entry->new(-id => 'created from pdb.pm');
128	2					6	my $all_headers = ( !$self->_noheader ); # we'll parse all headers and store as annotation
129	2					2	my %header; # stores all header RECORDs an is stored as annotations when ATOM is reached
130
131
132	2					9	$line = $self->_readline; # This needs to be before the first eof() test
133
134	2	50				4	if( !defined $line ) {
135	0					0	return; # no throws - end of file
136							}
137
138	2	50				8	if( $line =~ /^\s+$/ ) {
139	0					0	while( defined ($line = $self->_readline) ) {
140	0	0				0	$line =~/\S/ && last;
141							}
142							}
143	2	50				6	if( !defined $line ) {
144	0					0	return; # end of file
145							}
146	2	50				6	$line =~ /^HEADER\s+\S+/ \|\| $self->throw("PDB stream with no HEADER. Not pdb in my book");
147	2					9	my($header_line) = unpack "x10 a56", $line;
148	2					4	$header{'header'} = $header_line;
149	2					6	my($class, $depdate, $idcode) = unpack "x10 a40 a9 x3 a4", $line;
150	2					8	$idcode =~ s/^\s(\S+)\s$/$1/;
151	2					6	$struc->id($idcode);
152	2					11	$self->debug("PBD c $class d $depdate id $idcode\n"); # XXX KB
153
154	2					2	my $buffer = $line;
155
156							BEFORE_COORDINATES :
157	2					9	until( !defined $buffer ) {
158	292					305	$_ = $buffer;
159
160							# Exit at start of coordinate section
161	292	100				540	last if /^(MODEL\|ATOM\|HETATM)/;
162
163							# OBSLTE line(s)
164	290	50	33			402	if (/^OBSLTE / && $all_headers) {
165	0					0	$obslte = $self->_read_PDB_singlecontline("OBSLTE","12-70",\$buffer);
166	0					0	$header{'obslte'} = $obslte;
167							}
168
169							# TITLE line(s)
170	290	100	66			375	if (/^TITLE / && $all_headers) {
171	1					4	$title = $self->_read_PDB_singlecontline("TITLE","11-70",\$buffer);
172	1					12	$header{'title'} = $title;
173							}
174
175							# CAVEAT line(s)
176	290	50	33			404	if (/^CAVEAT / && $all_headers) {
177	0					0	$caveat = $self->_read_PDB_singlecontline("CAVEAT","12-70",\$buffer);
178	0					0	$header{'caveat'} = $caveat;
179							}
180
181							# COMPND line(s)
182	290	100	66			392	if (/^COMPND / && $all_headers) {
183	2					6	$compnd = $self->_read_PDB_singlecontline("COMPND","11-70",\$buffer);
184	2					5	$header{'compnd'} = $compnd;
185	2					18	$self->debug("get COMPND $compnd\n");
186							}
187
188							# SOURCE line(s)
189	290	100	66			359	if (/^SOURCE / && $all_headers) {
190	2					7	$source = $self->_read_PDB_singlecontline("SOURCE","11-70",\$buffer);
191	2					5	$header{'source'} = $source;
192							}
193
194							# KEYWDS line(s)
195	290	100	66			400	if (/^KEYWDS / && $all_headers) {
196	1					5	$keywds = $self->_read_PDB_singlecontline("KEYWDS","11-70",\$buffer);
197	1					2	$header{'keywds'} = $keywds;
198							}
199
200							# EXPDTA line(s)
201	290	100	66			370	if (/^EXPDTA / && $all_headers) {
202	1					3	$expdta = $self->_read_PDB_singlecontline("EXPDTA","11-70",\$buffer);
203	1					2	$header{'expdta'} = $expdta;
204							}
205
206							# AUTHOR line(s)
207	290	100	66			410	if (/^AUTHOR / && $all_headers) {
208	2					4	$author = $self->_read_PDB_singlecontline("AUTHOR","11-70",\$buffer);
209	2					4	$header{'author'} = $author;
210							}
211
212							# REVDAT line(s)
213							# a bit more elaborate as we also store the modification number
214	290	100	66			385	if (/^REVDAT / && $all_headers) {
215							##my($modnum,$rol) = unpack "x7 A3 x3 A53", $_;
216							##$modnum =~ s/\s+//; # remove spaces
217							##$revdat{$modnum} .= $rol;
218	3					7	my ($rol) = unpack "x7 a59", $_;
219	3					7	$revdat .= $rol;
220	3					3	$header{'revdat'} = $revdat;
221							}
222
223							# SPRSDE line(s)
224	290	50	33			394	if (/^SPRSDE / && $all_headers) {
225	0					0	$sprsde = $self->_read_PDB_singlecontline("SPRSDE","12-70",\$buffer);
226	0					0	$header{'sprsde'} = $sprsde;
227							}
228
229							# jRNL line(s)
230	290	100	66			398	if (/^JRNL / && $all_headers) {
231	2					6	$jrnl = $self->_read_PDB_jrnl(\$buffer);
232	2					5	$struc->annotation->add_Annotation('reference',$jrnl);
233	2					4	$header{'jrnl'} = 1; # when writing out, we need a way to check there was a JRNL record (not mandatory)
234							}
235
236							# REMARK line(s)
237							# we only parse the "REMARK 1" lines (additional references)
238							# thre rest is stored in %remark (indexed on remarkNum) (pack does space-padding)
239	290	100	66			869	if (/^REMARK\s+(\d+)\s*/ && $all_headers) {
240	240					360	my $remark_num = $1;
241	240	100				317	if ($remark_num == 1) {
242	2					5	my @refs = $self->_read_PDB_remark_1(\$buffer);
243							# How can we find the primary reference when writing (JRNL record) XXX KB
244	2					3	foreach my $ref (@refs) {
245	8					13	$struc->annotation->add_Annotation('reference', $ref);
246							}
247							# $_ still holds the REMARK_1 line, $buffer now contains the first non
248							# REMARK_1 line. We need to parse it in this pass (so no else block)
249	2					3	$_ = $buffer;
250							}
251							# for the moment I don't see a better solution (other then using goto)
252	240	50				474	if (/^REMARK\s+(\d+)\s*/) {
253	240					265	my $r_num = $1;
254	240	50				320	if ($r_num != 1) { # other remarks, we store literlly at the moment
255	240					450	my ($rol) = unpack "x11 a59", $_;
256	240					503	$remark{$r_num} .= $rol;
257							}
258							}
259							} # REMARK
260
261							# DBREF line(s)
262							# references to sequences in other databases
263							# we store as 'dblink' annotations and whole line as simple annotation (round-trip)
264	290	100	66			441	if (/^DBREF / && $all_headers) {
265	3					8	my ($rol) = unpack "x7 a61", $_;
266	3					7	$dbref .= $rol;
267	3					4	$header{'dbref'} = $dbref;
268	3					7	my ($db, $acc) = unpack "x26 a6 x1 a8", $_;
269	3					10	$db =~ s/\s*$//;
270	3					10	$acc =~ s/\s*$//;
271	3					10	my $link = Bio::Annotation::DBLink->new;
272	3					8	$link->database($db);
273	3					6	$link->primary_id($acc);
274	3					7	$struc->annotation->add_Annotation('dblink', $link);
275							} # DBREF
276
277							# SEQADV line(s)
278	290	50	33			424	if (/^SEQADV / && $all_headers) {
279	0					0	my ($rol) = unpack "x7 a63", $_;
280	0					0	$header{'seqadv'} .= $rol;
281							} # SEQADV
282
283							# SEQRES line(s)
284							# this is (I think) the sequence of macromolecule that was analysed
285							# this will be returned when doing $struc->seq
286	290	100	66			381	if (/^SEQRES / && $all_headers) {
287	8					19	my ($rol) = unpack "x8 a62", $_;
288	8					16	$header{'seqres'} .= $rol;
289							} # SEQRES
290
291							# MODRES line(s)
292	290	50	33			375	if (/^MODRES / && $all_headers) {
293	0					0	my ($rol) = unpack "x7 a63", $_;
294	0					0	$header{'modres'} .= $rol;
295							} # MODRES
296
297							# HET line(s)
298	290	100	66			360	if (/^HET / && $all_headers) {
299	3					7	my ($rol) = unpack "x7 a63", $_;
300	3					7	$header{'het'} .= $rol;
301							} # HET
302
303							# HETNAM line(s)
304	290	100	66			394	if (/^HETNAM / && $all_headers) {
305	1					3	my ($rol) = unpack "x8 a62", $_;
306	1					2	$header{'hetnam'} .= $rol;
307							} # HETNAM
308
309							# HETSYN line(s)
310	290	50	33			385	if (/^HETSYN / && $all_headers) {
311	0					0	my ($rol) = unpack "x8 a62", $_;
312	0					0	$header{'hetsyn'} .= $rol;
313							} # HETSYN
314
315							# FORMUL line(s)
316	290	100	66			369	if (/^FORMUL / && $all_headers) {
317	4					10	my ($rol) = unpack "x8 a62", $_;
318	4					14	$header{'formul'} .= $rol;
319							} # FORMUL
320
321							# HELIX line(s)
322							# store as specific object ??
323	290	100	66			443	if (/^HELIX / && $all_headers) {
324	2					4	my ($rol) = unpack "x7 a69", $_;
325	2					4	$header{'helix'} .= $rol;
326							} # HELIX
327
328							# SHEET line(s)
329							# store as specific object ??
330	290	100	66			400	if (/^SHEET / && $all_headers) {
331	3					7	my ($rol) = unpack "x7 a63", $_;
332	3					7	$header{'sheet'} .= $rol;
333							} # SHEET
334
335							# TURN line(s)
336							# store as specific object ??
337	290	50	33			361	if (/^TURN / && $all_headers) {
338	0					0	my ($rol) = unpack "x7 a63", $_;
339	0					0	$turn .= $rol;
340	0					0	$header{'turn'} = $turn;
341							} # TURN
342
343							# SSBOND line(s)
344							# store in connection-like object (see parsing of CONECT record)
345	290	100	66			392	if (/^SSBOND / && $all_headers) {
346	3					7	my ($rol) = unpack "x7 a65", $_;
347	3					6	$ssbond .= $rol;
348	3					4	$header{'ssbond'} = $ssbond;
349							} # SSBOND
350
351							# LINK
352							# store like SSBOND ?
353	290	100	66			403	if (/^LINK / && $all_headers) {
354	3					7	my ($rol) = unpack "x12 a60", $_;
355	3					5	$link .= $rol;
356	3					5	$header{'link'} = $link;
357							} # LINK
358
359							# HYDBND
360							# store like SSBOND
361	290	50	33			417	if (/^HYDBND / && $all_headers) {
362	0					0	my ($rol) = unpack "x12 a60", $_;
363	0					0	$hydbnd .= $rol;
364	0					0	$header{'hydbnd'} = $hydbnd;
365							} # HYDBND
366
367							# SLTBRG
368							# store like SSBOND ?
369	290	50	33			374	if (/^SLTBRG / && $all_headers) {
370	0					0	my ($rol) = unpack "x12 a60",$_;
371	0					0	$sltbrg .= $rol;
372	0					0	$header{'sltbrg'} = $sltbrg;
373							} # SLTBRG
374
375							# CISPEP
376							# store like SSBOND ?
377	290	50	33			404	if (/^CISPEP / && $all_headers) {
378	0					0	my ($rol) = unpack "x7 a52", $_;
379	0					0	$cispep .= $rol;
380	0					0	$header{'cispep'} = $cispep;
381							}
382
383							# SITE line(s)
384	290	50	33			360	if (/^SITE / && $all_headers) {
385	0					0	my ($rol) = unpack "x7 a54", $_;
386	0					0	$site .= $rol;
387	0					0	$header{'site'} = $site;
388							} # SITE
389
390							# CRYST1 line
391							# store in some crystallographic subobject ?
392	290	100	66			367	if (/^CRYST1/ && $all_headers) {
393	2					5	my ($rol) = unpack "x6 a64", $_;
394	2					3	$cryst1 .= $rol;
395	2					4	$header{'cryst1'} = $cryst1;
396							} # CRYST1
397
398							# ORIGXn line(s) (n=1,2,3)
399	290	100	66			388	if (/^(ORIGX\d) / && $all_headers) {
400	6					16	my $origxn = lc($1);
401	6					18	my ($rol) = unpack "x10 a45", $_;
402	6					19	$header{$origxn} .= $rol;
403							} # ORIGXn
404
405							# SCALEn line(s) (n=1,2,3)
406	290	100	66			382	if (/^(SCALE\d) / && $all_headers) {
407	6					11	my $scalen = lc($1);
408	6					13	my ($rol) = unpack "x10 a45", $_;
409	6					16	$header{$scalen} .= $rol;
410							} # SCALEn
411
412							# MTRIXn line(s) (n=1,2,3)
413	290	50	33			370	if (/^(MTRIX\d) / && $all_headers) {
414	0					0	my $mtrixn = lc($1);
415	0					0	my ($rol) = unpack "x7 a53", $_;
416	0					0	$header{$mtrixn} .= $rol;
417							} # MTRIXn
418
419							# TVECT line(s)
420	290	100	66			348	if (/^TVECT / && $all_headers) {
421	1					3	my ($rol) = unpack "x7 a63", $_;
422	1					1	$tvect .= $rol;
423	1					2	$header{'tvect'} = $tvect;
424							}
425
426							# Get next line.
427	290					456	$buffer = $self->_readline;
428							}
429
430							# store %header entries a annotations
431	2	50				4	if (%header) {
432	2					9	for my $record (keys %header) {
433	42					63	my $sim = Bio::Annotation::SimpleValue->new();
434	42					75	$sim->value($header{$record});
435	42					58	$struc->annotation->add_Annotation($record, $sim);
436							}
437							}
438							# store %remark entries as annotations
439	2	50				5	if (%remark) {
440	2					6	for my $remark_num (keys %remark) {
441	17					25	my $sim = Bio::Annotation::SimpleValue->new();
442	17					59	$sim->value($remark{$remark_num});
443	17					24	$struc->annotation->add_Annotation("remark_$remark_num", $sim);
444							}
445							}
446
447							# Coordinate section, the real meat
448							#
449							# $_ contains a line beginning with (ATOM\|MODEL)
450
451	2					5	$buffer = $_;
452
453
454	2	50	33			16	if (defined($buffer) && $buffer =~ /^(ATOM \|MODEL \|HETATM)/ ) { # can you have an entry without ATOM ?
455	2					4	while( defined ($buffer) ) { # (yes : 1a7z )
456							# read in one model at a time
457	2					5	my $model = $self->_read_PDB_coordinate_section(\$buffer, $struc);
458							# add this to $struc
459	2					8	$struc->add_model($struc, $model);
460
461	2	50	33			13	if ($buffer && $buffer !~ /^MODEL /) { # if we get here we have multiple MODELs
462	2					4	last;
463							}
464							}
465							}
466							else {
467	0					0	$self->throw("Could not find a coordinate section in this record\n");
468							}
469
470
471	2					6	until( !defined $buffer ) {
472	23					23	$_ = $buffer;
473
474							# CONNECT records
475	23	100				53	if (/^CONECT/) {
476							# do not differentiate between different type of connect (column dependant)
477	19					20	my $conect_unpack = "x6 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5 a5";
478	19					62	my (@conect) = unpack $conect_unpack, $_;
479	19					34	for my $k (0 .. $#conect) {
480	209					578	$conect[$k] =~ s/\s//g;
481							}
482	19					23	my $source = shift @conect;
483	19					19	my $type;
484	19					22	for my $k (0 .. 9) {
485	190	100				284	next unless ($conect[$k] =~ /^\d+$/);
486							# 0..3 bond
487	32	50	0			48	if( $k <= 3 ) {
		0	0
		0	0
			0
488	32					26	$type = "bond";
489							}
490							# 4..5,7..8 hydrogen bonded
491							elsif( ($k >= 4 && $k <= 5) \|\| ($k >= 7 && $k <= 8) ) {
492	0					0	$type = "hydrogen";
493							}
494							# 6, 9 salt bridged
495							elsif( $k == 6 \|\| $k == 9 ) {
496	0					0	$type = "saltbridged";
497							} else {
498	0					0	$self->throw("k has impossible value ($k), check brain");
499							}
500	32					56	$struc->conect($source, $conect[$k], $type);
501							}
502							}
503
504							# MASTER record
505	23	100				39	if (/^MASTER /) {
506							# the numbers in here a checksums, we should use them :)
507	2					6	my ($rol) = unpack "x10 a60", $_;
508	2					7	$struc->master($rol);
509							}
510
511	23	100				32	if (/^END/) {
512							# this it the end ...
513							}
514
515	23					34	$buffer = $self->_readline;
516							}
517
518
519	2					40	return $struc;
520							}
521
522							=head2 write_structure
523
524							Title : write_structure
525							Usage : $stream->write_structure($struc)
526							Function: writes the $struc object (must be a Bio::Structure) to the stream
527							Returns : 1 for success and 0 for error
528							Args : Bio::Structure object
529
530							=cut
531
532							sub write_structure {
533	1			1	1	6	my ($self, $struc) = @_;
534	1	50				3	if( !defined $struc ) {
535	0					0	$self->throw("Attempting to write with no structure!");
536							}
537
538	1	50	33			8	if( ! ref $struc \|\| ! $struc->isa('Bio::Structure::StructureI') ) {
539	0					0	$self->throw(" $struc is not a StructureI compliant module.");
540							}
541	1					2	my ($ann, $string, $output_string, $key);
542							# HEADER
543	1					4	($ann) = $struc->annotation->get_Annotations("header");
544	1	50				4	if (defined $ann) {
545	1					3	$string = $ann->as_text;
546	1					4	$string =~ s/^Value: //;
547	1					5	$output_string = pack ("A10 A56", "HEADER", $string);
548							} else { # not read in via read_structure, create HEADER line
549	0					0	my $id = $struc->id;
550	0	0				0	if (!$id) {
551	0					0	$id = "UNK1";
552							}
553	0	0				0	if (length($id) > 4) {
554	0					0	$id = substr($id,0,4);
555							}
556	0					0	my $classification = "DEFAULT CLASSIFICATION";
557	0					0	my $dep_date = "24-JAN-70";
558	0					0	$output_string = pack ("A10 A40 A12 A4", "HEADER", $classification, $dep_date, $id);
559							}
560	1					5	$output_string .= " " x (80 - length($output_string) );
561	1					13	$self->_print("$output_string\n");
562
563	1					1	my (%header);
564	1					3	for $key ($struc->annotation->get_all_annotation_keys) {
565	38					43	$header{$key} = 1;;
566							}
567
568	1	50				4	exists $header{'obslte'} && $self->_write_PDB_simple_record(-name => "OBSLTE ", -cont => "9-10",
569							-annotation => $struc->annotation->get_Annotations("obslte"), -rol => "11-70");
570
571	1	50				4	exists $header{'title'} && $self->_write_PDB_simple_record(-name => "TITLE ", -cont => "9-10",
572							-annotation => $struc->annotation->get_Annotations("title"), -rol => "11-70");
573
574	1	50				4	exists $header{'caveat'} && $self->_write_PDB_simple_record(-name => "CAVEAT ", -cont => "9-10",
575							-annotation => $struc->annotation->get_Annotations("caveat"), -rol => "12-70");
576
577	1	50				4	exists $header{'compnd'} && $self->_write_PDB_simple_record(-name => "COMPND ", -cont => "9-10",
578							-annotation => $struc->annotation->get_Annotations("compnd"), -rol => "11-70");
579
580	1	50				5	exists $header{'source'} && $self->_write_PDB_simple_record(-name => "SOURCE ", -cont => "9-10",
581							-annotation => $struc->annotation->get_Annotations("source"), -rol => "11-70");
582
583	1	50				6	exists $header{'keywds'} && $self->_write_PDB_simple_record(-name => "KEYWDS ", -cont => "9-10",
584							-annotation => $struc->annotation->get_Annotations("keywds"), -rol => "11-70");
585
586	1	50				5	exists $header{'expdta'} && $self->_write_PDB_simple_record(-name => "EXPDTA ", -cont => "9-10",
587							-annotation => $struc->annotation->get_Annotations("expdta"), -rol => "11-70");
588
589	1	50				5	exists $header{'author'} && $self->_write_PDB_simple_record(-name => "AUTHOR ", -cont => "9-10",
590							-annotation => $struc->annotation->get_Annotations("author"), -rol => "11-70");
591
592	1	50				5	exists $header{'revdat'} && $self->_write_PDB_simple_record(-name => "REVDAT ",
593							-annotation => $struc->annotation->get_Annotations("revdat"), -rol => "8-66");
594
595	1	50				6	exists $header{'sprsde'} && $self->_write_PDB_simple_record(-name => "SPRSDE ", -cont => "9-10",
596							-annotation => $struc->annotation->get_Annotations("sprsde"), -rol => "12-70");
597
598							# JRNL en REMARK 1
599	1					2	my ($jrnl_done, $remark_1_counter);
600	1	50				4	if ( !exists $header{'jrnl'} ) {
601	0					0	$jrnl_done = 1;
602							}
603	1					4	foreach my $ref ($struc->annotation->get_Annotations('reference') ) {
604	1	50				3	if( !$jrnl_done ) { # JRNL record
605	1	50				4	$ref->authors && $self->_write_PDB_simple_record(-name => "JRNL AUTH",
606							-cont => "17-18", -rol => "20-70", -string => $ref->authors );
607	1	50				4	$ref->title && $self->_write_PDB_simple_record(-name => "JRNL TITL",
608							-cont => "17-18", -rol => "20-70", -string => $ref->title );
609	1	50				3	$ref->editors && $self->_write_PDB_simple_record(-name => "JRNL EDIT",
610							-cont => "17-18", -rol => "20-70", -string => $ref->editors );
611	1	50				3	$ref->location && $self->_write_PDB_simple_record(-name => "JRNL REF ",
612							-cont => "17-18", -rol => "20-70", -string => $ref->location );
613	1	50				3	$ref->editors && $self->_write_PDB_simple_record(-name => "JRNL EDIT",
614							-cont => "17-18", -rol => "20-70", -string => $ref->editors );
615	1	50				3	$ref->encoded_ref && $self->_write_PDB_simple_record(-name => "JRNL REFN",
616							-cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
617	1					3	$jrnl_done = 1;
618							} else { # REMARK 1
619	0	0				0	if (!$remark_1_counter) { # header line
620	0					0	my $remark_1_header_line = "REMARK 1" . " " x 70;
621	0					0	$self->_print("$remark_1_header_line\n");
622	0					0	$remark_1_counter = 1;
623							}
624							# per reference header
625	0					0	my $rem_line = "REMARK 1 REFERENCE " . $remark_1_counter;
626	0					0	$rem_line .= " " x (80 - length($rem_line) );
627	0					0	$self->_print($rem_line,"\n");
628	0	0				0	$ref->authors && $self->_write_PDB_simple_record(-name => "REMARK 1 AUTH",
629							-cont => "17-18", -rol => "20-70", -string => $ref->authors );
630	0	0				0	$ref->title && $self->_write_PDB_simple_record(-name => "REMARK 1 TITL",
631							-cont => "17-18", -rol => "20-70", -string => $ref->title );
632	0	0				0	$ref->editors && $self->_write_PDB_simple_record(-name => "REMARK 1 EDIT",
633							-cont => "17-18", -rol => "20-70", -string => $ref->editors );
634	0	0				0	$ref->location && $self->_write_PDB_simple_record(-name => "REMARK 1 REF ",
635							-cont => "17-18", -rol => "20-70", -string => $ref->location );
636	0	0				0	$ref->editors && $self->_write_PDB_simple_record(-name => "REMARK 1 EDIT",
637							-cont => "17-18", -rol => "20-70", -string => $ref->editors );
638	0	0				0	$ref->encoded_ref && $self->_write_PDB_simple_record(-name => "REMARK 1 REFN",
639							-cont => "17-18", -rol => "20-70", -string => $ref->encoded_ref );
640	0					0	$remark_1_counter++;
641							}
642							}
643	1	50				3	if (! defined $remark_1_counter ) { # no remark 1 record written yet
644	1					2	my $remark_1_header_line = "REMARK 1" . " " x 70;
645	1					3	$self->_print("$remark_1_header_line\n"); # write dummy (we need this line)
646							}
647
648							# REMARK's (not 1 at the moment, references)
649	1					2	my (%remarks, $remark_num);
650	1					7	for $key (keys %header) {
651	38	100				50	next unless ($key =~ /^remark_(\d+)$/);
652	13	50				25	next if ($1 == 1);
653	13					19	$remarks{$1} = 1;
654							}
655	1					9	for $remark_num (sort {$a <=> $b} keys %remarks) {
	34					31
656	13					21	$self->_write_PDB_remark_record($struc, $remark_num);
657							}
658
659	1	50				6	exists $header{'dbref'} && $self->_write_PDB_simple_record(-name => "DBREF ",
660							-annotation => $struc->annotation->get_Annotations("dbref"), -rol => "8-68");
661	1	50				4	exists $header{'seqadv'} && $self->_write_PDB_simple_record(-name => "SEQADV ",
662							-annotation => $struc->annotation->get_Annotations("seqadv"), -rol => "8-70");
663	1	50				4	exists $header{'seqres'} && $self->_write_PDB_simple_record(-name => "SEQRES ",
664							-annotation => $struc->annotation->get_Annotations("seqres"), -rol => "9-70");
665	1	50				3	exists $header{'modres'} && $self->_write_PDB_simple_record(-name => "MODRES ",
666							-annotation => $struc->annotation->get_Annotations("modres"), -rol => "8-70");
667	1	50				5	exists $header{'het'} && $self->_write_PDB_simple_record(-name => "HET ",
668							-annotation => $struc->annotation->get_Annotations("het"), -rol => "8-70");
669	1	50				5	exists $header{'hetnam'} && $self->_write_PDB_simple_record(-name => "HETNAM ",
670							-annotation => $struc->annotation->get_Annotations("hetnam"), -rol => "9-70");
671	1	50				4	exists $header{'hetsyn'} && $self->_write_PDB_simple_record(-name => "HETSYN ",
672							-annotation => $struc->annotation->get_Annotations("hetsyn"), -rol => "9-70");
673	1	50				5	exists $header{'formul'} && $self->_write_PDB_simple_record(-name => "FORMUL ",
674							-annotation => $struc->annotation->get_Annotations("formul"), -rol => "9-70");
675	1	50				4	exists $header{'helix'} && $self->_write_PDB_simple_record(-name => "HELIX ",
676							-annotation => $struc->annotation->get_Annotations("helix"), -rol => "8-76");
677	1	50				3	exists $header{'sheet'} && $self->_write_PDB_simple_record(-name => "SHEET ",
678							-annotation => $struc->annotation->get_Annotations("sheet"), -rol => "8-70");
679	1	50				3	exists $header{'turn'} && $self->_write_PDB_simple_record(-name => "TURN ",
680							-annotation => $struc->annotation->get_Annotations("turn"), -rol => "8-70");
681	1	50				3	exists $header{'ssbond'} && $self->_write_PDB_simple_record(-name => "SSBOND ",
682							-annotation => $struc->annotation->get_Annotations("ssbond"), -rol => "8-72");
683	1	50				4	exists $header{'link'} && $self->_write_PDB_simple_record(-name => "LINK ",
684							-annotation => $struc->annotation->get_Annotations("link"), -rol => "13-72");
685	1	50				4	exists $header{'hydbnd'} && $self->_write_PDB_simple_record(-name => "HYDBND ",
686							-annotation => $struc->annotation->get_Annotations("hydbnd"), -rol => "13-72");
687	1	50				3	exists $header{'sltbrg'} && $self->_write_PDB_simple_record(-name => "SLTBRG ",
688							-annotation => $struc->annotation->get_Annotations("sltbrg"), -rol => "13-72");
689	1	50				2	exists $header{'cispep'} && $self->_write_PDB_simple_record(-name => "CISPEP ",
690							-annotation => $struc->annotation->get_Annotations("cispep"), -rol => "8-59");
691	1	50				4	exists $header{'site'} && $self->_write_PDB_simple_record(-name => "SITE ",
692							-annotation => $struc->annotation->get_Annotations("site"), -rol => "8-61");
693	1	50				5	exists $header{'cryst1'} && $self->_write_PDB_simple_record(-name => "CRYST1",
694							-annotation => $struc->annotation->get_Annotations("cryst1"), -rol => "7-70");
695	1					3	for my $k (1..3) {
696	3					5	my $origxn = "origx".$k;
697	3					6	my $ORIGXN = uc($origxn)." ";
698	3	50				9	exists $header{$origxn} && $self->_write_PDB_simple_record(-name => $ORIGXN,
699							-annotation => $struc->annotation->get_Annotations($origxn), -rol => "11-55");
700							}
701	1					3	for my $k (1..3) {
702	3					5	my $scalen = "scale".$k;
703	3					5	my $SCALEN = uc($scalen)." ";
704	3	50				11	exists $header{$scalen} && $self->_write_PDB_simple_record(-name => $SCALEN,
705							-annotation => $struc->annotation->get_Annotations($scalen), -rol => "11-55");
706							}
707	1					2	for my $k (1..3) {
708	3					6	my $mtrixn = "mtrix".$k;
709	3					4	my $MTRIXN = uc($mtrixn)." ";
710	3	50				6	exists $header{$mtrixn} && $self->_write_PDB_simple_record(-name => $MTRIXN,
711							-annotation => $struc->annotation->get_Annotations($mtrixn), -rol => "8-60");
712							}
713	1	50				4	exists $header{'tvect'} && $self->_write_PDB_simple_record(-name => "TVECT ",
714							-annotation => $struc->annotation->get_Annotations("tvect"), -rol => "8-70");
715
716							# write out coordinate section
717							#
718	1					3	my %het_res; # hetero residues
719	1					3	$het_res{'HOH'} = 1; # water is default
720	1	50				3	if (exists $header{'het'}) {
721	1					3	my ($het_line) = ($struc->annotation->get_Annotations("het"))[0]->as_text;
722	1					4	$het_line =~ s/^Value: //;
723	1					3	for ( my $k = 0; $k <= length $het_line ; $k += 63) {
724	3					5	my $l = substr $het_line, $k, 63;
725	3					9	$l =~ s/^\s(\S+)\s+.$/$1/;
726	3					7	$het_res{$l} = 1;
727							}
728							}
729	1					4	for my $model ($struc->get_models) {
730							# more then one model ?
731	1	50				3	if ($struc->get_models > 1) {
732	0					0	my $model_line = sprintf("MODEL %4d", $model->id);
733	0					0	$model_line .= " " x (80 - length($model_line) );
734	0					0	$self->_print($model_line, "\n");
735							}
736	1					3	for my $chain ($struc->get_chains($model)) {
737	4					9	my ($residue, $atom, $resname, $resnum, $atom_line, $atom_serial, $atom_icode, $chain_id);
738	4					0	my ($prev_resname, $prev_resnum, $prev_atomicode); # need these for TER record
739	4					4	my $last_record = ""; # Used to spot an ATOM -> HETATM change within a chain
740	4					129	$chain_id = $chain->id;
741	4	100				8	if ( $chain_id eq "default" ) {
742	1					1	$chain_id = " ";
743							}
744	4					17	$self->debug("model_id: $model->id chain_id: $chain_id\n");
745	4					10	for $residue ($struc->get_residues($chain)) {
746	60					103	($resname, $resnum) = split /-/, $residue->id;
747	60					125	for $atom ($struc->get_atoms($residue)) {
748	171	100				247	if ($het_res{$resname}) { # HETATM
749	45	50	66			87	if ( $resname ne "HOH" && $last_record eq "ATOM " ) {
750							# going from ATOM -> HETATM, we have to write TER
751	0					0	my $ter_line = "TER ";
752	0					0	$ter_line .= sprintf("%5d", $atom_serial + 1);
753	0					0	$ter_line .= " ";
754	0					0	$ter_line .= sprintf("%3s ", $prev_resname);
755	0					0	$ter_line .= $chain_id;
756	0					0	$ter_line .= sprintf("%4d", $prev_resnum);
757	0	0				0	$ter_line .= $atom_icode ? $prev_atomicode : " "; # 27
758	0					0	$ter_line .= " " x (80 - length $ter_line); # extend to 80 chars
759	0					0	$self->_print($ter_line,"\n");
760							}
761	45					47	$atom_line = "HETATM";
762							} else {
763	126					240	$atom_line = "ATOM ";
764							}
765	171					159	$last_record = $atom_line;
766	171					247	$atom_line .= sprintf("%5d ", $atom->serial);
767	171					281	$atom_serial = $atom->serial; # we need it for TER record
768	171					249	$atom_icode = $atom->icode;
769							# remember some stuff if next iteration needs writing TER
770	171					169	$prev_resname = $resname;
771	171					171	$prev_resnum = $resnum;
772	171					144	$prev_atomicode = $atom_icode;
773							# getting the name of the atom correct is subtrivial
774	171					217	my $atom_id = $atom->id;
775							# is pdb_atomname set, then use this (most probably set when
776							# reading in the PDB record)
777	171					246	my $pdb_atomname = $atom->pdb_atomname;
778	171	50				219	if( defined $pdb_atomname ) {
779	171					259	$atom_line .= sprintf("%-4s", $pdb_atomname);
780							} else {
781							# start (educated) guessing
782	0					0	my $element = $atom->element;
783	0	0	0			0	if( defined $element && $element ne "H") {
784							# element should be at first two positions (right justified)
785							# ie. Calcium should be "CA "
786							# C alpha should be " CA "
787	0	0				0	if( length($element) == 2 ) {
788	0					0	$atom_line .= sprintf("%-4s", $atom->id);
789							} else {
790	0					0	$atom_line .= sprintf(" %-3s", $atom->id);
791							}
792							} else { # old behaviour do a best guess
793	0	0				0	if ($atom->id =~ /^\dH/) { # H: four positions, left justified
		0
794	0					0	$atom_line .= sprintf("%-4s", $atom->id);
795							} elsif (length($atom_id) == 4) {
796	0	0				0	if ($atom_id =~ /^(H\d\d)(\d)$/) { # turn H123 into 3H12
797	0					0	$atom_line .= $2.$1;
798							} else { # no more guesses, no more alternatives
799	0					0	$atom_line .= $atom_id;
800							}
801							} else { # if we get here and it is not correct let me know
802	0					0	$atom_line .= sprintf(" %-3s", $atom->id);
803							}
804							}
805							}
806							# we don't do alternate location at this moment
807	171					167	$atom_line .= " "; # 17
808	171					190	$atom_line .= sprintf("%3s",$resname); # 18-20
809	171					178	$atom_line .= " ".$chain_id; # 21, 22
810	171					205	$atom_line .= sprintf("%4d", $resnum); # 23-26
811	171	50				230	$atom_line .= $atom->icode ? $atom->icode : " "; # 27
812	171					173	$atom_line .= " "; # 28-30
813	171					248	$atom_line .= sprintf("%8.3f", $atom->x); # 31-38
814	171					308	$atom_line .= sprintf("%8.3f", $atom->y); # 39-46
815	171					272	$atom_line .= sprintf("%8.3f", $atom->z); # 47-54
816	171					264	$atom_line .= sprintf("%6.2f", $atom->occupancy); # 55-60
817	171					276	$atom_line .= sprintf("%6.2f", $atom->tempfactor); # 61-66
818	171					205	$atom_line .= " "; # 67-72
819	171	50				222	$atom_line .= $atom->segID ? # segID 73-76
820							sprintf("%-4s", $atom->segID) :
821							" ";
822	171	50				260	$atom_line .= $atom->element ?
823							sprintf("%2s", $atom->element) :
824							" ";
825	171	50				289	$atom_line .= $atom->charge ?
826							sprintf("%2s", $atom->charge) :
827							" ";
828
829	171					284	$self->_print($atom_line,"\n");
830							}
831							}
832							# write out TER record
833	4	100				11	if ( $resname ne "HOH" ) {
834	3					4	my $ter_line = "TER ";
835	3					8	$ter_line .= sprintf("%5d", $atom_serial + 1);
836	3					4	$ter_line .= " ";
837	3					4	$ter_line .= sprintf("%3s ", $resname);
838	3					5	$ter_line .= $chain_id;
839	3					5	$ter_line .= sprintf("%4d", $resnum);
840	3	50				6	$ter_line .= $atom_icode ? $atom_icode : " "; # 27
841	3					6	$ter_line .= " " x (80 - length $ter_line); # extend to 80 chars
842	3					5	$self->_print($ter_line,"\n");
843							}
844							}
845	1	50				4	if ($struc->get_models > 1) { # we need ENDMDL
846	0					0	my $endmdl_line = "ENDMDL" . " " x 74;
847	0					0	$self->_print($endmdl_line, "\n");
848							}
849							} # for my $model
850
851							# CONECT
852	1					4	my @sources = $struc->get_all_conect_source;
853	1					3	my ($conect_line,@conect, @bond, @hydbond, @saltbridge, $to, $type);
854	1					2	for my $source (@sources) {
855							# get all conect's
856	8					13	my @conect = $struc->conect($source);
857							# classify
858	8					9	for my $con (@conect) {
859	12					19	($to, $type) = split /_/, $con;
860	12	50				19	if($type eq "bond") {
		0
		0
861	12					19	push @bond, $to;
862							} elsif($type eq "hydrogenbonded") {
863	0					0	push @hydbond, $to;
864							} elsif($type eq "saltbridged") {
865	0					0	push @saltbridge, $to;
866							} else {
867	0					0	$self->throw("type $type is unknown for conect");
868							}
869							}
870							# and write out CONECT lines as long as there is something
871							# in one of the arrays
872	8		66			12	while ( @bond \|\| @hydbond \|\| @saltbridge) {
			66
873	8					9	my ($b, $hb, $sb);
874	8					17	$conect_line = "CONECT". sprintf("%5d", $source);
875	8					10	for my $k (0..3) {
876	32					31	$b = shift @bond;
877	32	100				44	$conect_line .= $b ? sprintf("%5d", $b) : " ";
878							}
879	8					9	for my $k (4..5) {
880	16					14	$hb = shift @hydbond;
881	16	50				19	$conect_line .= $hb ? sprintf("%5d", $hb) : " ";
882							}
883	8					9	$sb = shift @saltbridge;
884	8	50				14	$conect_line .= $sb ? sprintf("%5d", $sb) : " ";
885	8					10	for my $k (7..8) {
886	16					16	$hb = shift @hydbond;
887	16	50				17	$conect_line .= $hb ? sprintf("%5d", $hb) : " ";
888							}
889	8					9	$sb = shift @saltbridge;
890	8	50				11	$conect_line .= $sb ? sprintf("%5d", $sb) : " ";
891
892	8					10	$conect_line .= " " x (80 - length($conect_line) );
893	8					13	$self->_print($conect_line, "\n");
894							}
895							}
896
897							# MASTER line contains checksums, we should calculate them of course :)
898	1					3	my $master_line = "MASTER " . $struc->master;
899	1					3	$master_line .= " " x (80 - length($master_line) );
900	1					3	$self->_print($master_line, "\n");
901
902	1					2	my $end_line = "END" . " " x 77;
903	1					2	$self->_print($end_line,"\n");
904
905							}
906
907							=head2 _filehandle
908
909							Title : _filehandle
910							Usage : $obj->_filehandle($newval)
911							Function:
912							Example :
913							Returns : value of _filehandle
914							Args : newvalue (optional)
915
916							=cut
917
918							sub _filehandle{
919	0			0		0	my ($obj,$value) = @_;
920	0	0				0	if( defined $value) {
921	0					0	$obj->{'_filehandle'} = $value;
922							}
923	0					0	return $obj->{'_filehandle'};
924
925							}
926
927							=head2 _noatom
928
929							Title : _noatom
930							Usage : $obj->_noatom($newval)
931							Function:
932							Example :
933							Returns : value of _noatom
934							Args : newvalue (optional)
935
936
937							=cut
938
939							sub _noatom{
940	2			2		4	my ($obj,$value) = @_;
941	2	50				4	if( defined $value) {
942	0					0	$obj->{'_noatom'} = $value;
943							}
944	2					3	return $obj->{'_noatom'};
945
946							}
947
948							=head2 _noheader
949
950							Title : _noheader
951							Usage : $obj->_noheader($newval)
952							Function:
953							Example :
954							Returns : value of _noheader
955							Args : newvalue (optional)
956
957
958							=cut
959
960							sub _noheader{
961	2			2		3	my ($obj,$value) = @_;
962	2	50				5	if( defined $value) {
963	0					0	$obj->{'_noheader'} = $value;
964							}
965	2					3	return $obj->{'_noheader'};
966
967							}
968
969							=head2 _read_PDB_singlecontline
970
971							Title : _read_PDB_singlecontline
972							Usage : $obj->_read_PDB_singlecontline($record, $fromto, $buffer))
973							Function: read single continued record from PDB
974							Returns : concatenated record entry (between $fromto columns)
975							Args : record, colunm delimiters, buffer
976
977							=cut
978
979							sub _read_PDB_singlecontline {
980	9			9		14	my ($self, $record, $fromto, $buffer) = @_;
981	9					9	my $concat_line;
982
983	9					18	my ($begin, $end) = (split (/-/, $fromto));
984	9					13	my $unpack_string = "x8 a2 ";
985	9	50				15	if($begin == 12) { # one additional space
986	0					0	$unpack_string .= "x1 a59";
987							} else {
988	9					12	$unpack_string .= "a60";
989							}
990	9					10	$_ = $$buffer;
991	9		66			15	while (defined( $_ \|\|= $self->_readline ) ) {
992	25	100				123	if ( /^$record/ ) {
993	16					43	my($cont, $rol) = unpack $unpack_string, $_;
994	16	100	66			41	if($cont =~ /\d$/ && $begin == 11) { # continuation line
995							# and text normally at pos 11
996	7					16	$rol =~ s/^\s//; # strip leading space
997							}
998							## no space (store litteraly) $concat_line .= $rol . " ";
999	16					27	$concat_line .= $rol;
1000							} else {
1001	9					13	last;
1002							}
1003
1004	16					44	$_ = undef;
1005							}
1006	9					22	$concat_line =~ s/\s$//; # remove trailing space
1007	9					14	$$buffer = $_;
1008
1009	9					14	return $concat_line;
1010							}
1011
1012
1013							=head2 _read_PDB_jrnl
1014
1015							Title : _read_PDB_jrnl
1016							Usage : $obj->_read_PDB_jrnl($\buffer))
1017							Function: read jrnl record from PDB
1018							Returns : Bio::Annotation::Reference object
1019							Args :
1020
1021							=cut
1022
1023							sub _read_PDB_jrnl {
1024	2			2		4	my ($self, $buffer) = @_;
1025
1026	2					2	$_ = $$buffer;
1027	2					2	my ($auth, $titl,$edit,$ref,$publ,$refn, $pmid, $doi);
1028	2		66			5	while (defined( $_ \|\|= $self->_readline )) {
1029	15	100				37	if (/^JRNL /) {
1030							# this code belgons in a seperate method (shared with
1031							# remark 1 parsing)
1032	13					38	my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
1033	13	100				30	$auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
1034	13	100				20	$titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
1035	13	50				18	$edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
1036	13	100				18	$ref = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
1037	13	50				20	$publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
1038	13	100				16	$refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
1039	13	50				16	$pmid = $self->_concatenate_lines($pmid,$rol) if ($subr eq "PMID");
1040	13	50				16	$doi = $self->_concatenate_lines($doi,$rol) if ($subr eq "DOI");
1041							} else {
1042	2					2	last;
1043							}
1044
1045	13					38	$_ = undef; # trigger reading of next line
1046							} # while
1047
1048	2					4	$$buffer = $_;
1049	2					9	my $jrnl_ref = Bio::Annotation::Reference->new;
1050
1051	2					6	$jrnl_ref->authors($auth);
1052	2					5	$jrnl_ref->title($titl);
1053	2					5	$jrnl_ref->location($ref);
1054	2					4	$jrnl_ref->publisher($publ);
1055	2					3	$jrnl_ref->editors($edit);
1056	2					7	$jrnl_ref->encoded_ref($refn);
1057	2					4	$jrnl_ref->pubmed($pmid);
1058	2					5	$jrnl_ref->doi($doi);
1059
1060	2					4	return $jrnl_ref;
1061							} # sub _read_PDB_jrnl
1062
1063
1064							=head2 _read_PDB_remark_1
1065
1066							Title : _read_PDB_remark_1
1067							Usage : $obj->_read_PDB_remark_1($\buffer))
1068							Function: read "remark 1" record from PDB
1069							Returns : array of Bio::Annotation::Reference objects
1070							Args :
1071
1072							=cut
1073
1074							sub _read_PDB_remark_1 {
1075	2			2		3	my ($self, $buffer) = @_;
1076
1077	2					3	$_ = $$buffer;
1078	2					4	my ($auth, $titl,$edit,$ref,$publ,$refn,$refnum,$pmid, $doi);
1079	2					0	my @refs;
1080
1081	2		66			5	while (defined( $_ \|\|= $self->_readline )) {
1082	59	100				126	if (/^REMARK 1 /) {
1083	57	100				103	if (/^REMARK 1\s+REFERENCE\s+(\d+)\s*/) {
1084	8					17	$refnum = $1;
1085	8	100				11	if ($refnum != 1) { # this is first line of a reference
1086	7					16	my $rref = Bio::Annotation::Reference->new;
1087	7					10	$rref->authors($auth);
1088	7					12	$rref->title($titl);
1089	7					12	$rref->location($ref);
1090	7					13	$rref->publisher($publ);
1091	7					13	$rref->editors($edit);
1092	7					12	$rref->encoded_ref($refn);
1093	7					11	$rref->pubmed($pmid);
1094	7					12	$rref->doi($doi);
1095	7					8	$auth = $titl = $edit = $ref = $publ = $refn = undef;
1096	7					10	push @refs, $rref;
1097							}
1098							} else {
1099							# this code belgons in a seperate method (shared with
1100							# remark 1 parsing)
1101	49					137	my ($rec, $subr, $cont, $rol) = unpack "A6 x6 A4 A2 x1 A51", $_;
1102	49	100				83	$auth = $self->_concatenate_lines($auth,$rol) if ($subr eq "AUTH");
1103	49	100				74	$titl = $self->_concatenate_lines($titl,$rol) if ($subr eq "TITL");
1104	49	50				84	$edit = $self->_concatenate_lines($edit,$rol) if ($subr eq "EDIT");
1105	49	100				62	$ref = $self->_concatenate_lines($ref ,$rol) if ($subr eq "REF");
1106	49	50				58	$publ = $self->_concatenate_lines($publ,$rol) if ($subr eq "PUBL");
1107	49	100				64	$refn = $self->_concatenate_lines($refn,$rol) if ($subr eq "REFN");
1108	49	50				59	$pmid = $self->_concatenate_lines($pmid,$rol) if ($subr eq "PMID");
1109	49	50				61	$doi = $self->_concatenate_lines($doi,$rol) if ($subr eq "DOI");
1110							}
1111							} else {
1112							# have we seen any reference at all (could be single REMARK 1 line
1113	2	100				5	if ( ! defined ($refnum) ) {
1114	1					2	last; # get out of while()
1115							}
1116
1117							# create last reference
1118	1					3	my $rref = Bio::Annotation::Reference->new;
1119	1					3	$rref->authors($auth);
1120	1					2	$rref->title($titl);
1121	1					1	$rref->location($ref);
1122	1					2	$rref->publisher($publ);
1123	1					2	$rref->editors($edit);
1124	1					2	$rref->encoded_ref($refn);
1125	1					2	$rref->pubmed($pmid);
1126	1					2	$rref->doi($doi);
1127	1					1	push @refs, $rref;
1128	1					1	last;
1129							}
1130
1131	57					111	$_ = undef; # trigger reading of next line
1132							} # while
1133
1134	2					4	$$buffer = $_;
1135
1136	2					5	return @refs;
1137							} # sub _read_PDB_jrnl
1138
1139
1140							=head2 _read_PDB_coordinate_section
1141
1142							Title : _read_PDB_coordinate_section
1143							Usage : $obj->_read_PDB_coordinate_section($\buffer))
1144							Function: read one model from a PDB
1145							Returns : Bio::Structure::Model object
1146							Args :
1147
1148							=cut
1149
1150							sub _read_PDB_coordinate_section {
1151	2			2		4	my ($self, $buffer, $struc) = @_;
1152	2					2	my ($model_num, $chain_name, $residue_name, $atom_name); # to keep track of state
1153	2					4	$model_num = "";
1154	2					2	$chain_name = "";
1155	2					1	$residue_name = "";
1156	2					2	$atom_name = "";
1157
1158	2					2	my $atom_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x3 a8 a8 a8 a6 a6 x6 a4 a2 a2";
1159	2					2	my $anisou_unpack = "x6 a5 x1 a4 a1 a3 x1 a1 a4 a1 x1 a7 a7 a7 a7 a7 a7 a4 a2 a2";
1160
1161	2					14	my $model = Bio::Structure::Model->new;
1162	2					6	$model->id('default');
1163	2					5	my $noatom = $self->_noatom;
1164	2					7	my ($chain, $residue, $atom, $old);
1165	2					0	my (%_ch_in_model); # which chains are already in this model
1166
1167	2					4	$_ = $$buffer;
1168	2		66			7	while (defined( $_ \|\|= $self->_readline )) {
1169							# start of a new model
1170	659	50				1078	if (/^MODEL\s+(\d+)/) {
1171	0					0	$model_num = $1;
1172	0					0	$self->debug("_read_PDB_coor: parsing model $model_num\n");
1173	0					0	$model->id($model_num);
1174	0	0				0	if (/^MODEL\s+\d+\s+\S+/) { # old format (pre 2.1)
1175	0					0	$old = 1;
1176							}
1177							}
1178							# old hier ook setten XXX
1179							# ATOM lines, if first set chain
1180	659	100				1713	if (/^(ATOM \|HETATM\|SIGATM)/) {
1181	653					3150	my @line_elements = unpack $atom_unpack, $_;
1182	653					808	my $pdb_atomname = $line_elements[1]; # need to get this before removing spaces
1183	653					1109	for my $k (0 .. $#line_elements) {
1184	9795					15741	$line_elements[$k] =~ s/^\s+//; # remove leading space
1185	9795					12033	$line_elements[$k] =~ s/\s+$//; # remove trailing space
1186	9795	100				16256	$line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
1187							}
1188	653					1485	my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode, $x, $y, $z,
1189							$occupancy, $tempfactor, $segID, $element, $charge) = @line_elements;
1190	653	100				961	$chainID = 'default' if ( !defined $chainID );
1191	653	100				821	if ($chainID ne $chain_name) { # possibly a new chain
1192							# fix for bug #1187
1193							# we can have ATOM/HETATM of an already defined chain (A B A B)
1194							# e.g. 1abm
1195
1196	5	50				18	if (exists $_ch_in_model{$chainID} ) { # we have already seen this chain in this model
1197	0					0	$chain = $_ch_in_model{$chainID};
1198							} else { # we create a new chain
1199	5					20	$chain = Bio::Structure::Chain->new;
1200	5					16	$struc->add_chain($model,$chain);
1201	5					12	$chain->id($chainID);
1202	5					7	$_ch_in_model{$chainID} = $chain;
1203							}
1204	5					10	$chain_name = $chain->id;
1205							}
1206							#my $res_name_num = $resname."-".$resseq;
1207	653					816	my $res_name_num = $resname."-".$resseq;
1208	653	50				792	$res_name_num .= '.'.$icode if $icode;
1209	653	100				763	if ($res_name_num ne $residue_name) { # new residue
1210	157					301	$residue = Bio::Structure::Residue->new;
1211	157					378	$struc->add_residue($chain,$residue);
1212	157					272	$residue->id($res_name_num);
1213	157					176	$residue_name = $res_name_num;
1214	157					161	$atom_name = ""; # only needed inside a residue
1215							}
1216							# get out of here if we don't want the atom objects
1217	653	50				813	if ($noatom) {
1218	0					0	$_ = undef;
1219	0					0	next;
1220							}
1221							# alternative location: only take first one
1222	653	0	33			850	if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
			33
1223	0					0	$_ = undef; # trigger reading next line
1224	0					0	next;
1225							}
1226	653	50				1384	if (/^(ATOM \|HETATM)/) { # ATOM / HETATM
1227	653					746	$atom_name = $atomname;
1228	653					1195	$atom = Bio::Structure::Atom->new;
1229	653					1388	$struc->add_atom($residue,$atom);
1230	653					1304	$atom->id($atomname);
1231	653					1168	$atom->pdb_atomname($pdb_atomname); # store away PDB atomname for writing out
1232	653					1088	$atom->serial($serial);
1233	653					1008	$atom->icode($icode);
1234	653					1032	$atom->x($x);
1235	653					1292	$atom->y($y);
1236	653					1059	$atom->z($z);
1237	653					1365	$atom->occupancy($occupancy);
1238	653					1015	$atom->tempfactor($tempfactor);
1239	653					1083	$atom->segID($segID); # deprecated but used by people
1240	653	50				917	if (! $old ) {
1241	653					1073	$atom->element($element);
1242	653					913	$atom->charge($charge);
1243							}
1244							}
1245							else { # SIGATM
1246	0					0	my $sigx = $x;
1247	0					0	my $sigy = $y;
1248	0					0	my $sigz = $z;
1249	0					0	my $sigocc = $occupancy;
1250	0					0	my $sigtemp = $tempfactor;
1251	0	0				0	if ($atom_name ne $atomname) { # something wrong with PDB file
1252	0					0	$self->throw("A SIGATM record should have the same $atomname as the previous record $atom_name\n");
1253							}
1254	0					0	$atom->sigx($sigx);
1255	0					0	$atom->sigy($sigy);
1256	0					0	$atom->sigz($sigz);
1257	0					0	$atom->sigocc($sigocc);
1258	0					0	$atom->sigtemp($sigtemp);
1259
1260							}
1261							} # ATOM\|HETARM\|SIGATM
1262
1263							# ANISOU \| SIGUIJ lines
1264	659	50				1502	if (/^(ANISOU\|SIGUIJ)/) {
1265	0	0				0	if ($noatom) {
1266	0					0	$_ = undef;
1267	0					0	next;
1268							}
1269	0					0	my @line_elements = unpack $anisou_unpack, $_;
1270	0					0	for my $k (0 .. $#line_elements) {
1271	0					0	$line_elements[$k] =~ s/^\s+//; # remove leading space
1272	0					0	$line_elements[$k] =~ s/\s+$//; # remove trailing space
1273	0	0				0	$line_elements[$k] = undef if ($line_elements[$k] =~ /^\s*$/);
1274							}
1275	0					0	my ($serial, $atomname, $altloc, $resname, $chainID, $resseq, $icode,
1276							$u11,$u22, $u33, $u12, $u13, $u23, $segID, $element, $charge) = @line_elements;
1277	0					0	$self->debug("read_PDB_coor: parsing ANISOU record: $serial $atomname\n");
1278	0	0	0			0	if ( $altloc && ($altloc =~ /\S+/) && ($atomname eq $atom_name) ) {
			0
1279	0					0	$_ = undef;
1280	0					0	next;
1281							}
1282	0	0				0	if (/^ANISOU/) {
1283	0	0				0	if ($atom_name ne $atomname) { # something wrong with PDB file
1284	0					0	$self->throw("A ANISOU record should have the same $atomname as the previous record $atom_name\n");
1285							}
1286	0					0	$atom->aniso("u11",$u11);
1287	0					0	$atom->aniso("u22",$u22);
1288	0					0	$atom->aniso("u33",$u33);
1289	0					0	$atom->aniso("u12",$u12);
1290	0					0	$atom->aniso("u13",$u13);
1291	0					0	$atom->aniso("u23",$u23);
1292							}
1293							else { # SIGUIJ
1294	0	0				0	if ($atom_name ne $atomname) { # something wrong with PDB file
1295	0					0	$self->throw("A SIGUIJ record should have the same $atomname as the previous record $atom_name\n");
1296							}
1297							# could use different variable names, but hey ...
1298	0					0	$atom->aniso("sigu11",$u11);
1299	0					0	$atom->aniso("sigu22",$u22);
1300	0					0	$atom->aniso("sigu33",$u33);
1301	0					0	$atom->aniso("sigu12",$u12);
1302	0					0	$atom->aniso("sigu13",$u13);
1303	0					0	$atom->aniso("sigu23",$u23);
1304							}
1305							} # ANISOU \| SIGUIJ
1306
1307	659	100				926	if (/^TER /) {
1308	4					8	$_ = undef;
1309	4					12	next;
1310							}
1311
1312	655	50				831	if (/^ENDMDL/) {
1313	0					0	$_ = $self->_readline;
1314	0					0	last;
1315							}
1316
1317	655	100				912	if (/^(CONECT\|MASTER)/) { # get out of here
1318							# current line is OK
1319	2					5	last;
1320							}
1321	653					1805	$_ = undef;
1322
1323							} # while
1324
1325	2					5	$$buffer = $_;
1326
1327	2					7	return $model;
1328							} # _read_PDB_coordinate_section
1329
1330
1331							sub _write_PDB_simple_record {
1332	38			38		73	my ($self, @args) = @_;
1333	38					96	my ($name, $cont , $annotation, $rol, $string) =
1334							$self->_rearrange([qw(
1335							NAME
1336							CONT
1337							ANNOTATION
1338							ROL
1339							STRING
1340							)],
1341							@args);
1342	38	50	66			95	if (defined $string && defined $annotation) {
1343	0					0	$self->throw("you can only supply one of -annoation or -string");
1344							}
1345	38					41	my ($output_string, $ann_string, $t_string);
1346	38					133	my ($rol_begin, $rol_end) = $rol =~ /^(\d+)-(\d+)$/;
1347	38					72	my $rol_length = $rol_end - $rol_begin +1;
1348	38	100				51	if ($string) {
1349	4	100				9	if (length $string > $rol_length) {
1350							# we might need to split $string in multiple lines
1351	2					4	while (length $string > $rol_length) {
1352							# other option might be to go for a bunch of substr's
1353	3					29	my @c = split//,$string;
1354	3					3	my $t = $rol_length; # index into @c
1355	3					8	while ($c[$t] ne " ") { # find first space, going backwards
1356	21					53	$self->debug("c[t]: $c[$t] $t\n");
1357	21					21	$t--;
1358	21	50				36	if ($t == 0) { $self->throw("Found no space for $string\n"); }
	0					0
1359							}
1360	3					9	$self->debug("t: $t rol_length: $rol_length\n");
1361	3					6	$ann_string .= substr($string, 0, $t);
1362	3					7	$self->debug("ann_string: $ann_string\n");
1363	3					6	$ann_string .= " " x ($rol_length - $t );
1364	3					5	$string = substr($string, $t+1);
1365	3					5	$string =~ s/^\s+//;
1366	3					10	$self->debug("ann_string: $ann_string~~\nstring: $string~~\n");
1367							}
1368	2					3	$ann_string .= $string;
1369							} else {
1370	2					3	$ann_string = $string;
1371							}
1372							} else {
1373	34					64	$ann_string = $annotation->as_text;
1374	34					103	$ann_string =~ s/^Value: //;
1375							}
1376							# ann_string contains the thing to write out, writing out happens below
1377	38					50	my $ann_length = length $ann_string;
1378
1379	38					116	$self->debug("ann_string: $ann_string\n");
1380	38	100				58	if ($cont) {
1381	10					32	my ($c_begin, $c_end) = $cont =~ /^(\d+)-(\d+)$/;
1382	10	100				16	if ( $ann_length > $rol_length ) { # we need to continuation lines
1383	6					7	my $first_line = 1;
1384	6					6	my $cont_number = 2;
1385	6					7	my $out_line;
1386	6					6	my $num_pos = $rol_length;
1387	6					8	my $i = 0;
1388	6					8	while( $i < $ann_length ) {
1389	15					20	$t_string = substr($ann_string, $i, $num_pos);
1390	15					36	$self->debug("t_string: $t_string~~$i $num_pos\n");
1391	15	100				21	if ($first_line) {
1392	6					15	$out_line = $name . " " x ($rol_begin - $c_begin) . $t_string;
1393	6					10	$out_line .= " " x (80 - length($out_line) ) . "\n";
1394	6					7	$first_line = 0;
1395	6					9	$output_string = $out_line;
1396	6					4	$i += $num_pos; # first do counter
1397	6	100				14	if ($rol_begin - $c_end == 1) { # next line one character less
1398	4					9	$num_pos--;
1399							}
1400							} else {
1401	9					20	$out_line = $name . sprintf("%2d",$cont_number);
1402							# a space after continuation number
1403	9	100				13	if ($rol_begin - $c_end == 1) { # one space after cont number
1404	6					6	$out_line .= " ";
1405	6					6	$out_line .= $t_string;
1406							} else {
1407	3					6	$out_line .= " " x ($rol_begin - $c_end - 1) . $t_string;
1408							}
1409	9					17	$out_line .= " " x (80 -length($out_line) ) . "\n";
1410	9					8	$cont_number++;
1411	9					11	$output_string .= $out_line;
1412	9					14	$i += $num_pos;
1413							}
1414							}
1415							} else { # no continuation
1416	4					7	my $spaces = $rol_begin - $c_begin; # number of spaces need to insert
1417	4					7	$output_string = $name . " " x $spaces . $ann_string;
1418	4					8	$output_string .= " " x (80 - length($output_string) );
1419							}
1420							} else { # no contintuation lines
1421	28	50				42	if ($ann_length < $rol_length) {
1422	0					0	$output_string = $name . $ann_string;
1423	0					0	$output_string .= " " x (80 - length($output_string) );
1424							} else {
1425	28					42	for (my $i = 0; $i < $ann_length; $i += $rol_length) {
1426	249					199	my $out_line;
1427	249					260	$t_string = substr($ann_string, $i, $rol_length);
1428	249					219	$out_line = $name . $t_string;
1429	249					291	$out_line .= " " x (80 -length($out_line) ) . "\n";
1430	249					389	$output_string .= $out_line;
1431							}
1432							}
1433							}
1434	38					115	$output_string =~ s/\n$//; # remove trailing newline
1435	38					104	$self->_print("$output_string\n");
1436
1437							}
1438
1439							sub _write_PDB_remark_record {
1440	13			13		19	my ($self, $struc, $remark_num) = @_;
1441	13					25	my ($ann) = $struc->annotation->get_Annotations("remark_$remark_num");
1442	13					36	my $name = sprintf("REMARK %3d ",$remark_num);
1443	13					23	$self->_write_PDB_simple_record(-name => $name, -annotation => $ann, -rol => "12-70");
1444							}
1445
1446							1;