File Coverage

blib/lib/HackaMol/Roles/ReadPdbRole.pm
Criterion Covered Total %
statement 95 99 95.9
branch 34 36 94.4
condition 5 6 83.3
subroutine 8 8 100.0
pod 1 3 33.3
total 143 152 94.0


line stmt bran cond sub pod time code
1             $HackaMol::Roles::ReadPdbRole::VERSION = '0.053';
2             # ABSTRACT: Read files with molecular information
3             use Moose::Role;
4 11     11   5452 use HackaMol::PeriodicTable qw(_element_name _trim _qstring_num);
  11         26  
  11         78  
5 11     11   48231 use Math::Vector::Real;
  11         28  
  11         626  
6 11     11   61 use Carp;
  11         24  
  11         389  
7 11     11   58  
  11         24  
  11         8348  
8             requires 'readline_func';
9              
10             my $self = shift;
11             my $fh = shift;
12 1     1 0 3  
13 1         2 my $header = $self->read_pdb_header( $fh );
14             my ($atoms,$model_ids) = $self->read_pdb_atoms( $fh );
15 1         98  
16 1         6 return ($header,$atoms,$model_ids);
17              
18 1         32 }
19              
20             my $self = shift;
21             my $fh = shift;
22             my $header;
23 1     1 0 5 my $line;
24 1         3 while($line = <$fh>){
25 1         3 if ($line =~ m/^(?:MODEL|ATOM|HETATM)\s/){
26             seek($fh, -length($line),1); # rewind back and end
27 1         48 last;
28 3 100       14 }
29 1         14 else{
30 1         4 $header .= $line;
31             }
32             }
33 2         7 return ($header);
34             }
35              
36 1         6  
37             #read pdb file and generate list of Atom objects
38             my $self = shift;
39              
40             my $fh = shift;
41             #my $file = shift;
42 15     15 1 43 #my $fh = FileHandle->new("<$file") or croak "unable to open $file";
43              
44 15         29 my @atoms;
45             my @model_ids;
46             my ( $n, $t ) = ( 0, 0 );
47             my $q_tbad = 0;
48 15         42 my $something_dirty = 0;
49             my $t0_atom_count = 0;
50 15         48 my $t_atom_count = 0;
51 15         34 my $pdb_model_id;
52 15         29  
53 15         32 while (<$fh>) {
54 15         38 if($self->has_readline_func){
55 15         30 next if $self->readline_func->($_) eq 'PDB_SKIP';
56             }
57 15         2149 if (/^(?:MODEL\s+(\d+))/) {
58 8525 100       215209  
59 921 100       16230 $n = 0;
60             $q_tbad = 0; # flag a bad model and never read again!
61 7670 100       31958 $t_atom_count = 0 ;
    100          
    100          
62             $pdb_model_id = $1;
63 19         44 $model_ids[$t] = $pdb_model_id;
64 19         34 }
65 19         33 elsif (/^(?:ENDMDL)/) {
66 19         42 # delete coords if the number of atoms on t is != to that at start
67 19         155 if ($t){
68             if ($t_atom_count != $t0_atom_count){
69             my $carp_message =
70             "BAD t->$t PDB atom list length changed from $t0_atom_count to $t_atom_count: ignoring model $t";
71 19 100       62 carp $carp_message;
72 9 50       35 $_->delete_coords($t) foreach @atoms;
73 0         0 $t--;
74             }
75 0         0 }
76 0         0 $t++;
77 0         0 }
78             elsif (/^(?:HETATM|ATOM)/) {
79             next if $q_tbad;
80 19         193 my (
81             $record_name, $serial, $name, $altloc,
82             $resName, $chainID, $resSeq, $icod,
83 7326 100       13836 $x, $y, $z, $occ,
84             $B, $segID, $element, $charge
85 7257         46140 ) = unpack "A6A5x1A4A1A3A2A4A1x3A8A8A8A6A6x6A4A2A2", $_ . (" " x 12); # padded out to accommodate truncated pdbs
86              
87             if ( $charge =~ m/\d/ ) { $charge = _qstring_num($charge) }
88             else { $charge = 0 }
89              
90             if ( $chainID =~ m/\w/ ) { $chainID = _trim($chainID) }
91 7257 100       14416 else { $chainID = ' ' }
  7         44  
92 7250         10304  
93             $name = _trim($name);
94 7257 100       15939 $resName = _trim($resName);
  7242         17713  
95 15         30 $resSeq = _trim($resSeq);
96              
97 7257         13020 #$resSeq = 0 if ( $resSeq < 0 );
98 7257         12534 $serial = _trim($serial);
99 7257         11719 $segID = _trim($segID);
100              
101             $element = ucfirst( lc( _trim($element) ) );
102 7257         11600 my $qdirt = 0;
103 7257         12197 ( $element, $qdirt ) = _element_name($name)
104             unless ( $element =~ /\w+/ );
105 7257         11783 $something_dirty++ if ($qdirt);
106 7257         10325 my $xyz = V( $x, $y, $z );
107 7257 100       16732  
108             if ( $t == 0 ) {
109 7257 100       12388 $atoms[$n] = HackaMol::Atom->new(
110 7257         32425 name => $name,
111             record_name => $record_name,
112 7257 100       12847 serial => $serial,
113 4863         117553 chain => $chainID,
114             symbol => $element,
115             charges => [$charge],
116             coords => [$xyz],
117             occ => $occ * 1,
118             bfact => $B * 1,
119             resname => $resName,
120             resid => $resSeq,
121             icode => $icod,
122             segid => $segID,
123             altloc => $altloc,
124             );
125             $atoms[$n]->is_dirty($qdirt) unless $atoms[$n]->is_dirty;
126             $t0_atom_count++;
127             }
128             else {
129 4863 50       103351 # croak condition if atom changes between models
130 4863         7004 # does not catch case were number of atoms shrinks!
131             if ( $n > $#atoms or $name ne $atoms[$n]->name
132             or $element ne $atoms[$n]->symbol )
133             {
134             my $carp_message =
135 2394 100 66     55400 "BAD t->$t PDB Atom $n "
      100        
136             . "serial $serial resname $resName "
137             . "has changed";
138 2         15 carp $carp_message;
139             $q_tbad = $t; # this is a bad model!
140             # wipe out all the coords prior
141             $atoms[$_]->delete_coords($t) foreach 0 .. $n - 1;
142 2         67 $t--;
143 2         1085 next;
144             }
145 2         51 $atoms[$n]->set_coords( $t, $xyz );
146 2         7 $t_atom_count++;
147 2         16 }
148             $n++;
149 2392         63498 }
150 2392         2765 }
151              
152 7255         52225 # set iatom to track the array. diff from serial which refers to pdb
153             $atoms[$_]->iatom($_) foreach ( 0 .. $#atoms );
154             if ($something_dirty) {
155             if ( $self->hush_read <= 0 ) {
156             my $message = "MolReadRole> found $something_dirty dirty atoms. ";
157 15         541 $message .= "Check symbols and lookup names PeriodicTable.pm:";
158 15 100       58 my @sprintf;
159 2 100       56 foreach my $atom (grep {$_->is_dirty} @atoms){
160 1         6 push @sprintf, sprintf(" DIRTY: index %s name %s element %s %10.3f %10.3f %10.3f;", $atom->iatom, $atom->name, $atom->symbol, @{$atom->xyz});
161 1         4 }
162 1         3 $message .= $_ foreach @sprintf;
163 1         4 carp $message;
  36         653  
164 2         46 }
  2         12  
165             }
166 1         7 return (\@atoms,\@model_ids);
167 1         31 }
168              
169             no Moose::Role;
170 15         620  
171             1;
172              
173 11     11   93  
  11         26  
  11         70  
174             =pod
175              
176             =head1 NAME
177              
178             HackaMol::Roles::ReadPdbRole - Read files with molecular information
179              
180             =head1 VERSION
181              
182             version 0.053
183              
184             =head1 SYNOPSIS
185              
186             my @atoms = HackaMol->new
187             ->read_pdb_atoms("some.pdb");
188              
189             =head1 DESCRIPTION
190              
191             The HackaMol::Roles::ReadPdbRole provides read_pdb_atoms reading protein database files.
192              
193             =head1 METHODS
194              
195             =head2 read_pdb_atoms
196              
197             One argument: the filename
198             Returns a list of HackaMol::Atom objects.
199              
200             =head1 Additional information
201              
202             According to the PDB specification, the element symbol should be present in columns
203             77-78. The element is often ommitted by programs, such as charmm, that can write
204             pdbs because it makes the file larger, and the information is accessible somewhere
205             else (protein structure file). Unfortunately, other programs require the information.
206             HackaMol::MolReadRole, uses HackaMol::PeriodicTable to map common names to the element
207             (e.g. POT => 'K'). read_pdb_atoms will carp if the name is not in the hash, and then
208             set the element to the first letter of the name. If you see one of these messages for
209             a common atom, please submit an issue or pull request so the atom can be added!
210              
211             =head1 SEE ALSO
212              
213             =over 4
214              
215             =item *
216              
217             L<HackaMol>
218              
219             =item *
220              
221             L<HackaMol::Atom>
222              
223             =item *
224              
225             L<HackaMol::Roles::MolReadRole>
226              
227             =item *
228              
229             L<Protein Data Bank|http://pdb.org>
230              
231             =back
232              
233             =head1 AUTHOR
234              
235             Demian Riccardi <demianriccardi@gmail.com>
236              
237             =head1 COPYRIGHT AND LICENSE
238              
239             This software is copyright (c) 2017 by Demian Riccardi.
240              
241             This is free software; you can redistribute it and/or modify it under
242             the same terms as the Perl 5 programming language system itself.
243              
244             =cut