File Coverage

blib/lib/HackaMol/Roles/ReadPdbRole.pm
Criterion Covered Total %
statement 95 99 95.9
branch 34 36 94.4
condition 5 6 83.3
subroutine 8 8 100.0
pod 1 3 33.3
total 143 152 94.0


line stmt bran cond sub pod time code
1             package HackaMol::Roles::ReadPdbRole;
2             $HackaMol::Roles::ReadPdbRole::VERSION = '0.051';
3             # ABSTRACT: Read files with molecular information
4 11     11   5701 use Moose::Role;
  11         34  
  11         87  
5 11     11   57716 use HackaMol::PeriodicTable qw(_element_name _trim _qstring_num);
  11         60  
  11         677  
6 11     11   81 use Math::Vector::Real;
  11         41  
  11         438  
7 11     11   74 use Carp;
  11         35  
  11         10169  
8              
9             requires 'readline_func';
10              
11             sub read_pdb_parts{
12 1     1 0 4 my $self = shift;
13 1         2 my $fh = shift;
14              
15 1         83 my $header = $self->read_pdb_header( $fh );
16 1         15 my ($atoms,$model_ids) = $self->read_pdb_atoms( $fh );
17              
18 1         53 return ($header,$atoms,$model_ids);
19              
20             }
21              
22             sub read_pdb_header{
23 1     1 0 15 my $self = shift;
24 1         2 my $fh = shift;
25 1         3 my $header;
26             my $line;
27 1         59 while($line = <$fh>){
28 3 100       15 if ($line =~ m/^(?:MODEL|ATOM|HETATM)\s/){
29 1         16 seek($fh, -length($line),1); # rewind back and end
30 1         3 last;
31             }
32             else{
33 2         9 $header .= $line;
34             }
35             }
36 1         6 return ($header);
37             }
38              
39             sub read_pdb_atoms {
40              
41             #read pdb file and generate list of Atom objects
42 15     15 1 45 my $self = shift;
43              
44 15         43 my $fh = shift;
45             #my $file = shift;
46             #my $fh = FileHandle->new("<$file") or croak "unable to open $file";
47              
48 15         50 my @atoms;
49             my @model_ids;
50 15         61 my ( $n, $t ) = ( 0, 0 );
51 15         49 my $q_tbad = 0;
52 15         42 my $something_dirty = 0;
53 15         38 my $t0_atom_count = 0;
54 15         35 my $t_atom_count = 0;
55 15         41 my $pdb_model_id;
56              
57 15         2029 while (<$fh>) {
58 8525 100       268192 if($self->has_readline_func){
59 921 100       19969 next if $self->readline_func->($_) eq 'PDB_SKIP';
60             }
61 7670 100       39666 if (/^(?:MODEL\s+(\d+))/) {
    100          
    100          
62              
63 19         58 $n = 0;
64 19         45 $q_tbad = 0; # flag a bad model and never read again!
65 19         56 $t_atom_count = 0 ;
66 19         55 $pdb_model_id = $1;
67 19         139 $model_ids[$t] = $pdb_model_id;
68             }
69             elsif (/^(?:ENDMDL)/) {
70             # delete coords if the number of atoms on t is != to that at start
71 19 100       91 if ($t){
72 9 50       43 if ($t_atom_count != $t0_atom_count){
73 0         0 my $carp_message =
74             "BAD t->$t PDB atom list length changed from $t0_atom_count to $t_atom_count: ignoring model $t";
75 0         0 carp $carp_message;
76 0         0 $_->delete_coords($t) foreach @atoms;
77 0         0 $t--;
78             }
79             }
80 19         191 $t++;
81             }
82             elsif (/^(?:HETATM|ATOM)/) {
83 7326 100       16819 next if $q_tbad;
84             my (
85 7257         58289 $record_name, $serial, $name, $altloc,
86             $resName, $chainID, $resSeq, $icod,
87             $x, $y, $z, $occ,
88             $B, $segID, $element, $charge
89             ) = unpack "A6A5x1A4A1A3x1A1A4A1x3A8A8A8A6A6x6A4A2A2", $_ . (" " x 12); # padded out to accommodate truncated pdbs
90              
91 7257 100       19543 if ( $charge =~ m/\d/ ) { $charge = _qstring_num($charge) }
  7         49  
92 7250         11860 else { $charge = 0 }
93              
94 7257 100       20451 if ( $chainID =~ m/\w/ ) { $chainID = _trim($chainID) }
  7242         22164  
95 15         38 else { $chainID = ' ' }
96              
97 7257         15260 $name = _trim($name);
98 7257         14812 $resName = _trim($resName);
99 7257         14718 $resSeq = _trim($resSeq);
100              
101             #$resSeq = 0 if ( $resSeq < 0 );
102 7257         15387 $serial = _trim($serial);
103 7257         14366 $segID = _trim($segID);
104              
105 7257         14563 $element = ucfirst( lc( _trim($element) ) );
106 7257         12524 my $qdirt = 0;
107 7257 100       21054 ( $element, $qdirt ) = _element_name($name)
108             unless ( $element =~ /\w+/ );
109 7257 100       14759 $something_dirty++ if ($qdirt);
110 7257         37614 my $xyz = V( $x, $y, $z );
111              
112 7257 100       17974 if ( $t == 0 ) {
113 4863         147935 $atoms[$n] = HackaMol::Atom->new(
114             name => $name,
115             record_name => $record_name,
116             serial => $serial,
117             chain => $chainID,
118             symbol => $element,
119             charges => [$charge],
120             coords => [$xyz],
121             occ => $occ * 1,
122             bfact => $B * 1,
123             resname => $resName,
124             resid => $resSeq,
125             icode => $icod,
126             segid => $segID,
127             altloc => $altloc,
128             );
129 4863 50       126673 $atoms[$n]->is_dirty($qdirt) unless $atoms[$n]->is_dirty;
130 4863         8800 $t0_atom_count++;
131             }
132             else {
133             # croak condition if atom changes between models
134             # does not catch case were number of atoms shrinks!
135 2394 100 66     71248 if ( $n > $#atoms or $name ne $atoms[$n]->name
      100        
136             or $element ne $atoms[$n]->symbol )
137             {
138 2         22 my $carp_message =
139             "BAD t->$t PDB Atom $n "
140             . "serial $serial resname $resName "
141             . "has changed";
142 2         85 carp $carp_message;
143 2         1312 $q_tbad = $t; # this is a bad model!
144             # wipe out all the coords prior
145 2         85 $atoms[$_]->delete_coords($t) foreach 0 .. $n - 1;
146 2         7 $t--;
147 2         23 next;
148             }
149 2392         80285 $atoms[$n]->set_coords( $t, $xyz );
150 2392         3386 $t_atom_count++;
151             }
152 7255         63746 $n++;
153             }
154             }
155              
156             # set iatom to track the array. diff from serial which refers to pdb
157 15         715 $atoms[$_]->iatom($_) foreach ( 0 .. $#atoms );
158 15 100       80 if ($something_dirty) {
159 2 100       80 if ( $self->hush_read <= 0 ) {
160 1         6 my $message = "MolReadRole> found $something_dirty dirty atoms. ";
161 1         10 $message .= "Check symbols and lookup names PeriodicTable.pm:";
162 1         4 my @sprintf;
163 1         4 foreach my $atom (grep {$_->is_dirty} @atoms){
  36         837  
164 2         58 push @sprintf, sprintf(" DIRTY: index %s name %s element %s %10.3f %10.3f %10.3f;", $atom->iatom, $atom->name, $atom->symbol, @{$atom->xyz});
  2         27  
165             }
166 1         8 $message .= $_ foreach @sprintf;
167 1         31 carp $message;
168             }
169             }
170 15         853 return (\@atoms,\@model_ids);
171             }
172              
173 11     11   120 no Moose::Role;
  11         39  
  11         69  
174              
175             1;
176              
177             __END__
178              
179             =pod
180              
181             =head1 NAME
182              
183             HackaMol::Roles::ReadPdbRole - Read files with molecular information
184              
185             =head1 VERSION
186              
187             version 0.051
188              
189             =head1 SYNOPSIS
190              
191             my @atoms = HackaMol->new
192             ->read_pdb_atoms("some.pdb");
193              
194             =head1 DESCRIPTION
195              
196             The HackaMol::Roles::ReadPdbRole provides read_pdb_atoms reading protein database files.
197              
198             =head1 METHODS
199              
200             =head2 read_pdb_atoms
201              
202             One argument: the filename
203             Returns a list of HackaMol::Atom objects.
204              
205             =head1 Additional information
206              
207             According to the PDB specification, the element symbol should be present in columns
208             77-78. The element is often ommitted by programs, such as charmm, that can write
209             pdbs because it makes the file larger, and the information is accessible somewhere
210             else (protein structure file). Unfortunately, other programs require the information.
211             HackaMol::MolReadRole, uses HackaMol::PeriodicTable to map common names to the element
212             (e.g. POT => 'K'). read_pdb_atoms will carp if the name is not in the hash, and then
213             set the element to the first letter of the name. If you see one of these messages for
214             a common atom, please submit an issue or pull request so the atom can be added!
215              
216             =head1 SEE ALSO
217              
218             =over 4
219              
220             =item *
221              
222             L<HackaMol>
223              
224             =item *
225              
226             L<HackaMol::Atom>
227              
228             =item *
229              
230             L<HackaMol::Roles::MolReadRole>
231              
232             =item *
233              
234             L<Protein Data Bank|http://pdb.org>
235              
236             =back
237              
238             =head1 AUTHOR
239              
240             Demian Riccardi <demianriccardi@gmail.com>
241              
242             =head1 COPYRIGHT AND LICENSE
243              
244             This software is copyright (c) 2017 by Demian Riccardi.
245              
246             This is free software; you can redistribute it and/or modify it under
247             the same terms as the Perl 5 programming language system itself.
248              
249             =cut