File Coverage

blib/lib/HackaMol/Roles/ReadPdbRole.pm
Criterion Covered Total %
statement 95 99 95.9
branch 34 36 94.4
condition 5 6 83.3
subroutine 8 8 100.0
pod 1 3 33.3
total 143 152 94.0


line stmt bran cond sub pod time code
1             $HackaMol::Roles::ReadPdbRole::VERSION = '0.052';
2             # ABSTRACT: Read files with molecular information
3             use Moose::Role;
4 11     11   5960 use HackaMol::PeriodicTable qw(_element_name _trim _qstring_num);
  11         36  
  11         104  
5 11     11   51217 use Math::Vector::Real;
  11         28  
  11         668  
6 11     11   60 use Carp;
  11         28  
  11         410  
7 11     11   61  
  11         37  
  11         9071  
8             requires 'readline_func';
9              
10             my $self = shift;
11             my $fh = shift;
12 1     1 0 3  
13 1         2 my $header = $self->read_pdb_header( $fh );
14             my ($atoms,$model_ids) = $self->read_pdb_atoms( $fh );
15 1         82  
16 1         9 return ($header,$atoms,$model_ids);
17              
18 1         27 }
19              
20             my $self = shift;
21             my $fh = shift;
22             my $header;
23 1     1 0 3 my $line;
24 1         3 while($line = <$fh>){
25 1         2 if ($line =~ m/^(?:MODEL|ATOM|HETATM)\s/){
26             seek($fh, -length($line),1); # rewind back and end
27 1         49 last;
28 3 100       15 }
29 1         17 else{
30 1         5 $header .= $line;
31             }
32             }
33 2         8 return ($header);
34             }
35              
36 1         4  
37             #read pdb file and generate list of Atom objects
38             my $self = shift;
39              
40             my $fh = shift;
41             #my $file = shift;
42 15     15 1 36 #my $fh = FileHandle->new("<$file") or croak "unable to open $file";
43              
44 15         33 my @atoms;
45             my @model_ids;
46             my ( $n, $t ) = ( 0, 0 );
47             my $q_tbad = 0;
48 15         40 my $something_dirty = 0;
49             my $t0_atom_count = 0;
50 15         58 my $t_atom_count = 0;
51 15         29 my $pdb_model_id;
52 15         42  
53 15         33 while (<$fh>) {
54 15         32 if($self->has_readline_func){
55 15         28 next if $self->readline_func->($_) eq 'PDB_SKIP';
56             }
57 15         1421 if (/^(?:MODEL\s+(\d+))/) {
58 8525 100       219760  
59 921 100       16040 $n = 0;
60             $q_tbad = 0; # flag a bad model and never read again!
61 7670 100       33112 $t_atom_count = 0 ;
    100          
    100          
62             $pdb_model_id = $1;
63 19         53 $model_ids[$t] = $pdb_model_id;
64 19         45 }
65 19         29 elsif (/^(?:ENDMDL)/) {
66 19         47 # delete coords if the number of atoms on t is != to that at start
67 19         145 if ($t){
68             if ($t_atom_count != $t0_atom_count){
69             my $carp_message =
70             "BAD t->$t PDB atom list length changed from $t0_atom_count to $t_atom_count: ignoring model $t";
71 19 100       70 carp $carp_message;
72 9 50       40 $_->delete_coords($t) foreach @atoms;
73 0         0 $t--;
74             }
75 0         0 }
76 0         0 $t++;
77 0         0 }
78             elsif (/^(?:HETATM|ATOM)/) {
79             next if $q_tbad;
80 19         186 my (
81             $record_name, $serial, $name, $altloc,
82             $resName, $chainID, $resSeq, $icod,
83 7326 100       12617 $x, $y, $z, $occ,
84             $B, $segID, $element, $charge
85 7257         47702 ) = unpack "A6A5x1A4A1A3A2A4A1x3A8A8A8A6A6x6A4A2A2", $_ . (" " x 12); # padded out to accommodate truncated pdbs
86              
87             if ( $charge =~ m/\d/ ) { $charge = _qstring_num($charge) }
88             else { $charge = 0 }
89              
90             if ( $chainID =~ m/\w/ ) { $chainID = _trim($chainID) }
91 7257 100       14484 else { $chainID = ' ' }
  7         42  
92 7250         9320  
93             $name = _trim($name);
94 7257 100       16333 $resName = _trim($resName);
  7242         17934  
95 15         29 $resSeq = _trim($resSeq);
96              
97 7257         12982 #$resSeq = 0 if ( $resSeq < 0 );
98 7257         11450 $serial = _trim($serial);
99 7257         11170 $segID = _trim($segID);
100              
101             $element = ucfirst( lc( _trim($element) ) );
102 7257         12474 my $qdirt = 0;
103 7257         11846 ( $element, $qdirt ) = _element_name($name)
104             unless ( $element =~ /\w+/ );
105 7257         11218 $something_dirty++ if ($qdirt);
106 7257         10438 my $xyz = V( $x, $y, $z );
107 7257 100       17024  
108             if ( $t == 0 ) {
109 7257 100       11529 $atoms[$n] = HackaMol::Atom->new(
110 7257         31444 name => $name,
111             record_name => $record_name,
112 7257 100       13738 serial => $serial,
113 4863         118271 chain => $chainID,
114             symbol => $element,
115             charges => [$charge],
116             coords => [$xyz],
117             occ => $occ * 1,
118             bfact => $B * 1,
119             resname => $resName,
120             resid => $resSeq,
121             icode => $icod,
122             segid => $segID,
123             altloc => $altloc,
124             );
125             $atoms[$n]->is_dirty($qdirt) unless $atoms[$n]->is_dirty;
126             $t0_atom_count++;
127             }
128             else {
129 4863 50       101617 # croak condition if atom changes between models
130 4863         6638 # does not catch case were number of atoms shrinks!
131             if ( $n > $#atoms or $name ne $atoms[$n]->name
132             or $element ne $atoms[$n]->symbol )
133             {
134             my $carp_message =
135 2394 100 66     56857 "BAD t->$t PDB Atom $n "
      100        
136             . "serial $serial resname $resName "
137             . "has changed";
138 2         17 carp $carp_message;
139             $q_tbad = $t; # this is a bad model!
140             # wipe out all the coords prior
141             $atoms[$_]->delete_coords($t) foreach 0 .. $n - 1;
142 2         56 $t--;
143 2         1011 next;
144             }
145 2         47 $atoms[$n]->set_coords( $t, $xyz );
146 2         5 $t_atom_count++;
147 2         18 }
148             $n++;
149 2392         65402 }
150 2392         2689 }
151              
152 7255         50446 # set iatom to track the array. diff from serial which refers to pdb
153             $atoms[$_]->iatom($_) foreach ( 0 .. $#atoms );
154             if ($something_dirty) {
155             if ( $self->hush_read <= 0 ) {
156             my $message = "MolReadRole> found $something_dirty dirty atoms. ";
157 15         591 $message .= "Check symbols and lookup names PeriodicTable.pm:";
158 15 100       53 my @sprintf;
159 2 100       46 foreach my $atom (grep {$_->is_dirty} @atoms){
160 1         4 push @sprintf, sprintf(" DIRTY: index %s name %s element %s %10.3f %10.3f %10.3f;", $atom->iatom, $atom->name, $atom->symbol, @{$atom->xyz});
161 1         3 }
162 1         2 $message .= $_ foreach @sprintf;
163 1         4 carp $message;
  36         643  
164 2         46 }
  2         9  
165             }
166 1         4 return (\@atoms,\@model_ids);
167 1         44 }
168              
169             no Moose::Role;
170 15         553  
171             1;
172              
173 11     11   101  
  11         43  
  11         66  
174             =pod
175              
176             =head1 NAME
177              
178             HackaMol::Roles::ReadPdbRole - Read files with molecular information
179              
180             =head1 VERSION
181              
182             version 0.052
183              
184             =head1 SYNOPSIS
185              
186             my @atoms = HackaMol->new
187             ->read_pdb_atoms("some.pdb");
188              
189             =head1 DESCRIPTION
190              
191             The HackaMol::Roles::ReadPdbRole provides read_pdb_atoms reading protein database files.
192              
193             =head1 METHODS
194              
195             =head2 read_pdb_atoms
196              
197             One argument: the filename
198             Returns a list of HackaMol::Atom objects.
199              
200             =head1 Additional information
201              
202             According to the PDB specification, the element symbol should be present in columns
203             77-78. The element is often ommitted by programs, such as charmm, that can write
204             pdbs because it makes the file larger, and the information is accessible somewhere
205             else (protein structure file). Unfortunately, other programs require the information.
206             HackaMol::MolReadRole, uses HackaMol::PeriodicTable to map common names to the element
207             (e.g. POT => 'K'). read_pdb_atoms will carp if the name is not in the hash, and then
208             set the element to the first letter of the name. If you see one of these messages for
209             a common atom, please submit an issue or pull request so the atom can be added!
210              
211             =head1 SEE ALSO
212              
213             =over 4
214              
215             =item *
216              
217             L<HackaMol>
218              
219             =item *
220              
221             L<HackaMol::Atom>
222              
223             =item *
224              
225             L<HackaMol::Roles::MolReadRole>
226              
227             =item *
228              
229             L<Protein Data Bank|http://pdb.org>
230              
231             =back
232              
233             =head1 AUTHOR
234              
235             Demian Riccardi <demianriccardi@gmail.com>
236              
237             =head1 COPYRIGHT AND LICENSE
238              
239             This software is copyright (c) 2017 by Demian Riccardi.
240              
241             This is free software; you can redistribute it and/or modify it under
242             the same terms as the Perl 5 programming language system itself.
243              
244             =cut