File Coverage

blib/lib/Bio/Tools/Run/QCons.pm
Criterion Covered Total %
statement 59 59 100.0
branch 3 4 75.0
condition n/a
subroutine 13 13 100.0
pod n/a
total 75 76 98.6


line stmt bran cond sub pod time code
1             package Bio::Tools::Run::QCons;
2             {
3             $Bio::Tools::Run::QCons::VERSION = '0.112881';
4             }
5              
6             # ABSTRACT: Run Qcons to analyze protein-protein contacts
7              
8 2     2   36186 use Mouse;
  2         145745  
  2         11  
9 2     2   3249 use autodie;
  2         48559  
  2         14  
10 2     2   24048 use namespace::autoclean;
  2         120563  
  2         15  
11 2     2   5329 use Capture::Tiny 'capture_merged';
  2         169841  
  2         191  
12 2     2   1744 use Bio::Tools::Run::QCons::Types 'Executable';
  2         439  
  2         2744  
13              
14             has 'program_name' => (
15             is => 'ro',
16             isa => 'Executable',
17             default => 'Qcontacts',
18             );
19              
20             has file => (
21             is => 'ro',
22             isa => 'Str',
23             required => 1,
24             );
25              
26             has chains => (
27             is => 'ro',
28             isa => 'ArrayRef[Str]',
29             required => 1,
30             );
31              
32             has probe_radius => (
33             is => 'ro',
34             isa => 'Num',
35             default => 1.4,
36             );
37              
38             has _result => ( is => 'ro', lazy_build => 1 );
39              
40             has [qw(residue_contacts atom_contacts)] => (
41             is => 'ro',
42             lazy_build => 1,
43             );
44              
45             sub _build_residue_contacts {
46 1     1   17 return $_[0]->_result->{by_residue};
47             }
48              
49             sub _build_atom_contacts {
50 1     1   518 return $_[0]->_result->{by_atom};
51             }
52              
53             has verbose => (
54             is => 'rw',
55             isa => 'Bool',
56             default => 0,
57             );
58              
59             has _temp_dir => (
60             is => 'ro',
61             isa => 'File::Temp::Dir',
62             lazy_build => 1,
63             );
64              
65             sub _build__temp_dir {
66 1     1   12 require File::Temp;
67              
68 1         12 return File::Temp->newdir();
69             }
70              
71             sub _build__result {
72              
73             # Run Qcontacts with the set parameters, and return
74             # an array with the contact information.
75              
76 1     1   2 my $self = shift;
77 1         3 my $arguments;
78 1         5 my $executable = $self->program_name;
79              
80 1         7 $self->_arguments->{-prefOut} = $self->_temp_dir->dirname . '/';
81              
82             my $output = capture_merged {
83 1     1   1238 system( $executable, %{ $self->_arguments } )
  1         128364  
84 1         44 };
85              
86 1 50       1509 warn $output if $self->verbose;
87              
88 1         19 my @contacts_by_atom = $self->_parse_by_atom();
89 1         15 my @contacts_by_residue = $self->_parse_by_residue();
90              
91 1         52 return { by_atom => \@contacts_by_atom, by_residue => \@contacts_by_residue };
92             }
93              
94             has _arguments => (
95             is => 'ro',
96             init_arg => undef,
97             lazy_build => 1,
98             );
99              
100             # Private methods
101              
102             sub _parse_by_residue {
103              
104             # Qcontacts outputs two files. This subroutine parses
105             # the file that outputs residue-residue contacts.
106              
107 1     1   4 my $self = shift;
108 1         2 my @contacts;
109              
110             # Get the path to the output file.
111 1         16 my $filename = $self->_arguments->{-prefOut} . '/-by-res.vor';
112              
113 1         10 open( my $fh, '<', $filename );
114              
115             # Parse the file line by line, each line corresponds to a
116             # contact.
117 1         224 while ( my $line = <$fh> ) {
118 48         176 my @fields = split( /\s+/, $line );
119              
120 48         248 my %contact = (
121             res1 => {
122             number => $fields[1],
123             name => $fields[2],
124             },
125             res2 => {
126             number => $fields[5],
127             name => $fields[6],
128             },
129             area => $fields[8],
130             );
131 48         200 push @contacts, \%contact;
132             }
133              
134 1         25 return @contacts;
135             }
136              
137             sub _parse_by_atom {
138              
139             # Qcontacts outputs two files. This subroutine parses
140             # the file that outputs atom-atom contacts.
141              
142 1     1   11 my $self = shift;
143 1         12 my @contacts;
144              
145             # Get the path to the output file.
146 1         16 my $filename = $self->_arguments->{-prefOut} . '/-by-atom.vor';
147              
148              
149 1         28 open( my $fh, '<', $filename );
150             # Parse the file line by line, each line corresponds to a
151             # contact.
152              
153 1         4615 my $meaning_for = {
154              
155             # What each parsed field means, depending on the contact
156             # type (fields[1])
157              
158             # contact type => { field number => meaning }
159             V => { 13 => 'area' },
160             H => { 13 => 'area', 14 => 'angle', 15 => 'Rno' },
161             S => {
162             13 => 'area',
163             15 => 'dGhb',
164             17 => 'dGip',
165             18 => 'angle',
166             19 => 'Rno'
167             },
168             I => { 13 => 'area', 14 => 'Rno' },
169             };
170              
171 1         42 while ( my $line = <$fh> ) {
172 233         1253 my @fields = split( ' ', $line );
173 233         2258 my %contact = (
174             atom1 => {
175             number => $fields[5],
176             name => $fields[6],
177             res_name => $fields[3],
178             res_number => $fields[2],
179             },
180             atom2 => {
181             number => $fields[11],
182             name => $fields[12],
183             res_name => $fields[9],
184             res_number => $fields[8],
185             },
186             type => $fields[1],
187             area => $fields[8],
188             );
189              
190             # I can't wait for Perl 6's junctions.
191 233         574 foreach my $type ( keys %$meaning_for ) {
192 932 100       1774 if ( $type eq $fields[1] ) {
193 233         235 foreach my $field ( keys %{ $meaning_for->{$type} } ) {
  233         525  
194              
195             # I just realized that there's parameter in the 'S' type
196             # that has a ')' sticked to it, remove it.
197 239         454 $fields[$field] =~ s/\)//g;
198 239         1014 $contact{ $meaning_for->{$type}{$field} }
199             = $fields[$field];
200             }
201             }
202             }
203              
204 233         1256 push @contacts, \%contact;
205             }
206              
207 1         95 return @contacts;
208              
209             }
210              
211             sub _build__arguments {
212 1     1   1248 my $self = shift;
213             return {
214 1         6 -c1 => ${ $self->chains }[0],
  1         18  
215 1         4 -c2 => ${ $self->chains }[1],
216             -i => $self->file,
217             -probe => $self->probe_radius,
218             };
219             }
220              
221             1;
222              
223              
224             =pod
225              
226             =head1 NAME
227              
228             Bio::Tools::Run::QCons - Run Qcons to analyze protein-protein contacts
229              
230             =head1 VERSION
231              
232             version 0.112881
233              
234             =head1 SYNOPSIS
235              
236             my $q = Bio::Tools::Run::QCons->new(
237             file => $pdbfile,
238             chains => [$chain1, $chain2],
239             );
240              
241             my $contacts_by_atom = $q->atom_contacts;
242             my $contacts_by_residue = $q->residue_contacts;
243              
244             =head1 DESCRIPTION
245              
246             This module implements a wrapper for the QCons application. QCons
247             itself is an implementation of the Polyhedra algorithm for the
248             prediction of protein-protein contacts. From the program's web page
249             (L):
250              
251             "QContacts allows for a fast and accurate analysis of protein binding
252             interfaces. Given a PDB file [...] and the interacting chains of
253             interest, QContacts will provide a graphical representation of the
254             residues in contact. The contact map will not only indicate the
255             contacts present between the two proteins, but will also indicate
256             whether the contact is a packing interaction, hydrogen bond, ion pair
257             or salt bridge (hydrogen-bonded ion pair). Contact maps allow for easy
258             visualization and comparison of protein-protein interactions."
259              
260             For a thorough description of the algorithm, its limitations and a
261             comparison with several others, refer to Fischer, T. et. al: Assessing
262             methods for identifying pair-wise atomic contacts across binding
263             interfaces, J. Struc. Biol., vol 153, p. 103-112, 2006.
264              
265             =head1 ATTRIBUTES
266              
267             =head2 file
268              
269             Gets or sets the file with the protein structures to analyze. The file
270             format should be PDB.
271              
272             Required.
273              
274             =head2 chains
275              
276             chains => ['A', 'B'];
277              
278             Gets or sets the chain IDs of the subunits whose contacts the program
279             will calculate. It takes an array reference of two strings as
280             argument.
281              
282             Required.
283              
284             =head2 probe_radius($radius);
285              
286             Gets or sets the probe radius that the program uses to calculate the
287             exposed and buried surfaces. It defaults to 1.4 Angstroms, and unless
288             you have a really strong reason to change this, you should refrain
289             from doing it.
290              
291             =head2 verbose
292              
293             Output debugging information to C. Off by default.
294              
295             =head2 program_name
296              
297             The name of the executable. Defaults to 'Qcontacts', but it can be set
298             to anything at construction time:
299              
300             my $q = Bio::Tools::Run::QCons->new(
301             program_name => 'qcons',
302             file => $pdbfile,
303             chains => [$chain1, $chain2]
304             );
305              
306             Notice that if the binary is not on your PATH environment variable, you
307             should give C a full path to it.
308              
309             =head2 atom_contacts
310              
311             Return an array reference with the information of every atom-atom
312             contact found.
313              
314             The structure of the array reference is as follows:
315              
316             $by_atom = [
317             {
318             'area' => '0.400',
319             'type' => 'V',
320             'atom2' => {
321             'number' => '461',
322             'res_name' => 'SER',
323             'res_number' => '59',
324             'name' => 'OG'
325             },
326             'atom1' => {
327             'number' => '2226',
328             'res_name' => 'ASN',
329             'res_number' => '318',
330             'name' => 'CB'
331             }
332             },
333             ]
334              
335             This corresponds to the information of one contact. Here, 'atom1'
336             refers to the atom belonging to the first of the two polypeptides
337             given to the 'chains' method; 'atom2' refers to the second. The fields
338             'number' and 'name' refer to the atom's number and name, respectively.
339             The fields 'res_name' and 'res_number' indicate the atom's parent
340             residue name and residue id. 'type' indicates one of the five
341             non-covalent bonds that the program predicts:
342              
343             =head2 residue_contacts
344              
345             Returns an array reference with the information of every residue-residue
346             contact found.
347              
348             The structure of the array is organized as follows:
349              
350             $by_res = [
351             {
352             'area' => '20.033',
353             'res1' => {
354             'number' => '318',
355             'name' => 'ASN'
356             },
357             'res2' => {
358             'number' => '59',
359             'name' => 'SER'
360             }
361             },
362             ]
363              
364             Here, bond type is obviously not given since the contact can possibly
365             involve more than one atom-atom contact type. 'res1' and 'res2'
366             correspond to the residues of the first and second chain ID given,
367             respectively. 'area' is the sum of every atom-atom contact that the
368             residue pair has. Their names (as three-letter residue names) and
369             number are given as hashrefs.
370              
371             =begin :list
372              
373             * B Van der Waals (packing interaction)
374              
375             * B Ion pair
376              
377             * B Salt bridge (hydrogen-bonded ion pair)
378              
379             * B Hydrogen bond (hydrogen-bonded ion pair)
380              
381             =end :list
382              
383             Every bond type has the 'area' attribute, which indicates the surface
384             (in square Angstroms) of the interaction. In addition, all N-O
385             contacts (I, S and H) have a 'Rno' value that represents the N-O
386             distance. Directional contacts (S and H) also have an 'angle' feature
387             that indicates the contact angle. For salt bridges, estimations of the
388             free energy of hydrogen bond (dGhb) and free energy of ionic pair
389             (dGip) are also given.
390              
391             =head1 THANKS
392              
393             To Chad Davis for prodding me to dust off and release this module to the CPAN.
394              
395             =head1 AUTHOR
396              
397             Bruno Vecchi
398              
399             =head1 COPYRIGHT AND LICENSE
400              
401             This software is copyright (c) 2011 by Bruno Vecchi.
402              
403             This is free software; you can redistribute it and/or modify it under
404             the same terms as the Perl 5 programming language system itself.
405              
406             =cut
407              
408              
409             __END__