File Coverage

blib/lib/Bio/Tools/Run/QCons.pm
Criterion Covered Total %
statement 62 62 100.0
branch 3 4 75.0
condition n/a
subroutine 14 14 100.0
pod n/a
total 79 80 98.7


line stmt bran cond sub pod time code
1             package Bio::Tools::Run::QCons;
2             {
3             $Bio::Tools::Run::QCons::VERSION = '0.112990'; # TRIAL
4             }
5              
6             # ABSTRACT: Run Qcons to analyze protein-protein contacts
7              
8 2     2   42665 use Mouse;
  2         90629  
  2         12  
9 2     2   3315 use autodie;
  2         46333  
  2         14  
10 2     2   18532 use namespace::autoclean;
  2         69864  
  2         13  
11 2     2   2751 use Capture::Tiny 'capture_merged';
  2         97696  
  2         356  
12 2     2   1654 use Bio::Tools::Run::QCons::Types 'Executable';
  2         578  
  2         69  
13 2     2   19 use File::Spec;
  2         5  
  2         2571  
14              
15             has 'program_name' => (
16             is => 'ro',
17             isa => 'Executable',
18             default => 'Qcontacts',
19             );
20              
21             has file => (
22             is => 'ro',
23             isa => 'Str',
24             required => 1,
25             );
26              
27             has chains => (
28             is => 'ro',
29             isa => 'ArrayRef[Str]',
30             required => 1,
31             );
32              
33             has probe_radius => (
34             is => 'ro',
35             isa => 'Num',
36             default => 1.4,
37             );
38              
39             has _result => ( is => 'ro', lazy_build => 1 );
40              
41             has [qw(residue_contacts atom_contacts)] => (
42             is => 'ro',
43             lazy_build => 1,
44             );
45              
46             sub _build_residue_contacts {
47 1     1   14 return $_[0]->_result->{by_residue};
48             }
49              
50             sub _build_atom_contacts {
51 1     1   807 return $_[0]->_result->{by_atom};
52             }
53              
54             has verbose => (
55             is => 'rw',
56             isa => 'Bool',
57             default => 0,
58             );
59              
60             has _temp_dir => (
61             is => 'ro',
62             isa => 'File::Temp::Dir',
63             lazy_build => 1,
64             );
65              
66             sub _build__temp_dir {
67 1     1   10 require File::Temp;
68              
69 1         10 return File::Temp->newdir();
70             }
71              
72             sub _build__result {
73              
74             # Run Qcontacts with the set parameters, and return
75             # an array with the contact information.
76              
77 1     1   3 my $self = shift;
78 1         2 my $arguments;
79 1         6 my $executable = $self->program_name;
80              
81 1         130 $self->_arguments->{-prefOut} =
82             File::Spec->catfile( $self->_temp_dir->dirname, '' );
83              
84             my $output = capture_merged {
85 1     1   1388 system( $executable, %{ $self->_arguments } )
  1         122286  
86 1         70 };
87              
88 1 50       1509 warn $output if $self->verbose;
89              
90 1         15 my @contacts_by_atom = $self->_parse_by_atom();
91 1         18 my @contacts_by_residue = $self->_parse_by_residue();
92              
93 1         73 return { by_atom => \@contacts_by_atom, by_residue => \@contacts_by_residue };
94             }
95              
96             has _arguments => (
97             is => 'ro',
98             init_arg => undef,
99             lazy_build => 1,
100             );
101              
102             # Private methods
103              
104             sub _parse_by_residue {
105              
106             # Qcontacts outputs two files. This subroutine parses
107             # the file that outputs residue-residue contacts.
108              
109 1     1   5 my $self = shift;
110 1         3 my @contacts;
111              
112             # Get the path to the output file.
113 1         119 my $filename =
114             File::Spec->catfile( $self->_arguments->{-prefOut}, '-by-res.vor' );
115              
116 1         13 open( my $fh, '<', $filename );
117              
118             # Parse the file line by line, each line corresponds to a
119             # contact.
120 1         211 while ( my $line = <$fh> ) {
121 48         219 my @fields = split( /\s+/, $line );
122              
123 48         746 my %contact = (
124             res1 => {
125             number => $fields[1],
126             name => $fields[2],
127             },
128             res2 => {
129             number => $fields[5],
130             name => $fields[6],
131             },
132             area => $fields[8],
133             );
134 48         648 push @contacts, \%contact;
135             }
136              
137 1         30 return @contacts;
138             }
139              
140             sub _parse_by_atom {
141              
142             # Qcontacts outputs two files. This subroutine parses
143             # the file that outputs atom-atom contacts.
144              
145 1     1   6 my $self = shift;
146 1         5 my @contacts;
147              
148             # Get the path to the output file.
149 1         45 my $filename =
150             File::Spec->catfile( $self->_arguments->{-prefOut}, '-by-atom.vor' );
151              
152              
153 1         18 open( my $fh, '<', $filename );
154             # Parse the file line by line, each line corresponds to a
155             # contact.
156              
157 1         8926 my $meaning_for = {
158              
159             # What each parsed field means, depending on the contact
160             # type (fields[1])
161              
162             # contact type => { field number => meaning }
163             V => { 13 => 'area' },
164             H => { 13 => 'area', 14 => 'angle', 15 => 'Rno' },
165             S => {
166             13 => 'area',
167             15 => 'dGhb',
168             17 => 'dGip',
169             18 => 'angle',
170             19 => 'Rno'
171             },
172             I => { 13 => 'area', 14 => 'Rno' },
173             };
174              
175 1         44 while ( my $line = <$fh> ) {
176 233         1557 my @fields = split( ' ', $line );
177 233         4386 my %contact = (
178             atom1 => {
179             number => $fields[5],
180             name => $fields[6],
181             res_name => $fields[3],
182             res_number => $fields[2],
183             },
184             atom2 => {
185             number => $fields[11],
186             name => $fields[12],
187             res_name => $fields[9],
188             res_number => $fields[8],
189             },
190             type => $fields[1],
191             area => $fields[8],
192             );
193              
194             # I can't wait for Perl 6's junctions.
195 233         632 foreach my $type ( keys %$meaning_for ) {
196 932 100       2445 if ( $type eq $fields[1] ) {
197 233         350 foreach my $field ( keys %{ $meaning_for->{$type} } ) {
  233         555  
198              
199             # I just realized that there's parameter in the 'S' type
200             # that has a ')' sticked to it, remove it.
201 239         763 $fields[$field] =~ s/\)//g;
202 239         1816 $contact{ $meaning_for->{$type}{$field} }
203             = $fields[$field];
204             }
205             }
206             }
207              
208 233         4364 push @contacts, \%contact;
209             }
210              
211 1         96 return @contacts;
212              
213             }
214              
215             sub _build__arguments {
216 1     1   2908 my $self = shift;
217             return {
218 1         8 -c1 => ${ $self->chains }[0],
  1         21  
219 1         6 -c2 => ${ $self->chains }[1],
220             -i => $self->file,
221             -probe => $self->probe_radius,
222             };
223             }
224              
225             1;
226              
227              
228             =pod
229              
230             =head1 NAME
231              
232             Bio::Tools::Run::QCons - Run Qcons to analyze protein-protein contacts
233              
234             =head1 VERSION
235              
236             version 0.112990
237              
238             =head1 SYNOPSIS
239              
240             my $q = Bio::Tools::Run::QCons->new(
241             file => $pdbfile,
242             chains => [$chain1, $chain2],
243             );
244              
245             my $contacts_by_atom = $q->atom_contacts;
246             my $contacts_by_residue = $q->residue_contacts;
247              
248             =head1 DESCRIPTION
249              
250             This module implements a wrapper for the QCons application. QCons
251             itself is an implementation of the Polyhedra algorithm for the
252             prediction of protein-protein contacts. From the program's web page
253             (L):
254              
255             "QContacts allows for a fast and accurate analysis of protein binding
256             interfaces. Given a PDB file [...] and the interacting chains of
257             interest, QContacts will provide a graphical representation of the
258             residues in contact. The contact map will not only indicate the
259             contacts present between the two proteins, but will also indicate
260             whether the contact is a packing interaction, hydrogen bond, ion pair
261             or salt bridge (hydrogen-bonded ion pair). Contact maps allow for easy
262             visualization and comparison of protein-protein interactions."
263              
264             For a thorough description of the algorithm, its limitations and a
265             comparison with several others, refer to Fischer, T. et. al: Assessing
266             methods for identifying pair-wise atomic contacts across binding
267             interfaces, J. Struc. Biol., vol 153, p. 103-112, 2006.
268              
269             =head1 ATTRIBUTES
270              
271             =head2 file
272              
273             Gets or sets the file with the protein structures to analyze. The file
274             format should be PDB.
275              
276             Required.
277              
278             =head2 chains
279              
280             chains => ['A', 'B'];
281              
282             Gets or sets the chain IDs of the subunits whose contacts the program
283             will calculate. It takes an array reference of two strings as
284             argument.
285              
286             Required.
287              
288             =head2 probe_radius($radius);
289              
290             Gets or sets the probe radius that the program uses to calculate the
291             exposed and buried surfaces. It defaults to 1.4 Angstroms, and unless
292             you have a really strong reason to change this, you should refrain
293             from doing it.
294              
295             =head2 verbose
296              
297             Output debugging information to C. Off by default.
298              
299             =head2 program_name
300              
301             The name of the executable. Defaults to 'Qcontacts', but it can be set
302             to anything at construction time:
303              
304             my $q = Bio::Tools::Run::QCons->new(
305             program_name => 'qcons',
306             file => $pdbfile,
307             chains => [$chain1, $chain2]
308             );
309              
310             Notice that if the binary is not on your PATH environment variable, you
311             should give C a full path to it.
312              
313             =head2 atom_contacts
314              
315             Return an array reference with the information of every atom-atom
316             contact found.
317              
318             The structure of the array reference is as follows:
319              
320             $by_atom = [
321             {
322             'area' => '0.400',
323             'type' => 'V',
324             'atom2' => {
325             'number' => '461',
326             'res_name' => 'SER',
327             'res_number' => '59',
328             'name' => 'OG'
329             },
330             'atom1' => {
331             'number' => '2226',
332             'res_name' => 'ASN',
333             'res_number' => '318',
334             'name' => 'CB'
335             }
336             },
337             ]
338              
339             This corresponds to the information of one contact. Here, 'atom1'
340             refers to the atom belonging to the first of the two polypeptides
341             given to the 'chains' method; 'atom2' refers to the second. The fields
342             'number' and 'name' refer to the atom's number and name, respectively.
343             The fields 'res_name' and 'res_number' indicate the atom's parent
344             residue name and residue id. 'type' indicates one of the five
345             non-covalent bonds that the program predicts:
346              
347             =head2 residue_contacts
348              
349             Returns an array reference with the information of every residue-residue
350             contact found.
351              
352             The structure of the array is organized as follows:
353              
354             $by_res = [
355             {
356             'area' => '20.033',
357             'res1' => {
358             'number' => '318',
359             'name' => 'ASN'
360             },
361             'res2' => {
362             'number' => '59',
363             'name' => 'SER'
364             }
365             },
366             ]
367              
368             Here, bond type is obviously not given since the contact can possibly
369             involve more than one atom-atom contact type. 'res1' and 'res2'
370             correspond to the residues of the first and second chain ID given,
371             respectively. 'area' is the sum of every atom-atom contact that the
372             residue pair has. Their names (as three-letter residue names) and
373             number are given as hashrefs.
374              
375             =begin :list
376              
377             * B Van der Waals (packing interaction)
378              
379             * B Ion pair
380              
381             * B Salt bridge (hydrogen-bonded ion pair)
382              
383             * B Hydrogen bond (hydrogen-bonded ion pair)
384              
385             =end :list
386              
387             Every bond type has the 'area' attribute, which indicates the surface
388             (in square Angstroms) of the interaction. In addition, all N-O
389             contacts (I, S and H) have a 'Rno' value that represents the N-O
390             distance. Directional contacts (S and H) also have an 'angle' feature
391             that indicates the contact angle. For salt bridges, estimations of the
392             free energy of hydrogen bond (dGhb) and free energy of ionic pair
393             (dGip) are also given.
394              
395             =head1 THANKS
396              
397             To Chad Davis for prodding me to dust off and release this module to the CPAN.
398              
399             =head1 AUTHOR
400              
401             Bruno Vecchi
402              
403             =head1 COPYRIGHT AND LICENSE
404              
405             This software is copyright (c) 2011 by Bruno Vecchi.
406              
407             This is free software; you can redistribute it and/or modify it under
408             the same terms as the Perl 5 programming language system itself.
409              
410             =cut
411              
412              
413             __END__