File Coverage

blib/lib/HackaMol/Roles/SelectionRole.pm
Criterion Covered Total %
statement 41 41 100.0
branch 6 6 100.0
condition n/a
subroutine 6 6 100.0
pod 1 1 100.0
total 54 54 100.0


line stmt bran cond sub pod time code
1             $HackaMol::Roles::SelectionRole::VERSION = '0.053';
2             #ABSTRACT: Atom selections in molecules
3             use Moose::Role;
4 14     14   8276 use HackaMol::AtomGroup;
  14         30  
  14         94  
5 14     14   67108 use Carp;
  14         80  
  14         431  
6 14     14   72  
  14         27  
  14         9409  
7              
8             my %common_selection = (
9             'sidechain' => '$_->record_name eq "ATOM" and not $_->name =~ /^(N|CA|C|O|OXT)$/',
10             'backbone' => '$_->record_name eq "ATOM" and $_->name =~ /^(N|CA|C|O)$/', # backbone restricted to ATOM to avoid HETATM weirdness, e.g. het cys in 1v1q
11             'water' => '$_->resname =~ m/HOH|TIP|H2O/ and $_->record_name eq "HETATM"',
12             'protein' => '$_->record_name eq "ATOM"',
13             'ligands' => '($_->resname !~ m/HOH|TIP|H2O/ ) and $_->record_name eq "HETATM"',
14             'metals' => '$_->symbol =~ m/^(Li|Be|Na|Mg|K|Ca|Sc|Ti|V|Cr|Mn|Fe|Co|Ni|Cu|Zn|Rb|Sr|Y|Zr|Nb|Mo|Tc|Ru|Rh|Pd|Ag|Cd|Cs|Ba|La|Ce|Pr|Nd|Pm|Sm|Eu|Gd|Tb|Dy|Ho|Er|Tm|Yb|Lu|Hf|Ta|W|Re|Os|Ir|Pt|Au|Hg)$/',
15             );
16              
17             has 'selection' => (
18             traits => ['Hash'],
19             is => 'ro',
20             isa => 'HashRef[Str]',
21             lazy => 1,
22             default => sub { {} },
23             handles => {
24             get_selection => 'get',
25             set_selection => 'set',
26             has_selection => 'count',
27             keys_selection => 'keys',
28             delete_selection => 'delete',
29             has_selection => 'exists',
30             },
31             );
32              
33              
34             has 'selections_cr' => (
35             traits => ['Hash'],
36             is => 'ro',
37             isa => 'HashRef[CodeRef]',
38             default => sub { {} },
39             lazy => 1,
40             handles => {
41             get_selection_cr => 'get',
42             set_selection_cr => 'set',
43             has_selections_cr => 'count',
44             keys_selection_cr => 'keys',
45             delete_selection_cr => 'delete',
46             has_selection_cr => 'exists',
47             },
48             );
49              
50              
51             my $self = shift;
52             my $selection = shift;
53 37     37 1 588 my $method;
54 37         68  
55 37         51 if ($self->has_selection_cr($selection)){ #attr takes priority so user can change
56             $method = $self->get_selection_cr($selection);
57 37 100       1084 }
    100          
58 1         31 elsif ( exists( $common_selection{$selection} ) ) {
59             $method = eval("sub{ grep{ $common_selection{$selection} } \@_ }");
60             }
61 6         790 else {
62             $method = _regex_method($selection);
63             }
64 30         73  
65             #grep { &{ sub{ $_%2 } }($_)} 1..10
66              
67             my $group =
68             HackaMol::AtomGroup->new( atoms => [ &{$method}( $self->all_atoms ) ], );
69              
70 37         1054 return ($group);
  37         686  
71              
72 37         1999 }
73              
74             # $mol->select_group('(chain A .or. (resname TYR .and. chain B)) .and. occ .within. 1')
75             # becomes grep{($_->chain eq A or ($_->resname eq TYR and $_->chain eq 'B')) and $_->occ <= 1.0}
76              
77             my $str = shift;
78            
79             # allow and or .and. .or. ... does this cause other problems with names?
80 30     30   48 $str =~ s/\sand\s/ \.and\. /g;
81             $str =~ s/\sor\s/ \.or\. /g;
82             $str =~ s/\snot\s/ \.not\. /g;
83 30         97  
84 30         53 #print "$str not implemented yet"; return(sub{0});
85 30         41 #my @parenth = $str =~ /(\(([^()]|(?R))*\))/g
86              
87             # ranges resid 1+3-10+20 -> resid =~ /^(1|3|4|5|6|7|8|9|10|20)$/
88             my @ranges = $str =~ /(\w+\s+(?:\w+|\d+)(?:\+|\-)[^\s]+)/g;
89             foreach my $range (@ranges){
90             my ($attr,$sel) = split(/\s+/, $range);
91 30         140 #$range =~ s/\+/\\+/g;
92 30         64 #$range =~ s/\-/\\-/g;
93 6         22 my $gsel = join '|',map{/(.+)-(.+)/ ? ($1 .. $2) : $_ } split('\+', $sel );
94             $str =~ s/\Q$range\E/\$\_->$attr =~ \/^($gsel)\$\//g;
95             }
96 6 100       18  
  13         49  
97 6         67 $str =~ s/(\w+)\s+(\d*[A-Za-z]+\d*)/\$\_->$1 eq \'$2\'/g; # resnames must have at least 1 letter
98             $str =~ s/(\w+)\s+(-?\d+)/\$\_->$1 eq $2/g;
99             $str =~ s/(\w+)\s+\.within\.\s+(\d+)/\$\_->$1 <= $2/g;
100 30         190 $str =~ s/(\w+)\s+\.beyond\.\s+(\d+)/\$\_->$1 >= $2/g;
101 30         180 $str =~ s/$_/\($common_selection{$_}\)/g foreach keys %common_selection;
102 30         56 $str =~ s/\.and\./and/g;
103 30         44 $str =~ s/\.or\./or/g;
104 30         835 $str =~ s/\.not\./not/g;
105 30         95  
106 30         55 return ( eval("sub{ grep{ $str } \@_ }") );
107 30         50 }
108              
109 30         2588  
110              
111             no Moose::Role;
112              
113             1;
114 14     14   101  
  14         23  
  14         79  
115              
116             =pod
117              
118             =head1 NAME
119              
120             HackaMol::Roles::SelectionRole - Atom selections in molecules
121              
122             =head1 VERSION
123              
124             version 0.053
125              
126             =head1 DESCRIPTION
127              
128             The goal of HackaMol::Roles::SelectionRole is to simplify atom selections. This role is not loaded with the core; it
129             must be applied as done in the synopsis. The method commonly used is select_group, which uses regular expressions to convert
130             a string argument to construct a method for filtering; a HackaMol::AtomGroup is returned. The select_group method operates
131             on atoms contained within the object to which the role is applied (i.e. $self->all_atoms). The role is envisioned for
132             instances of the HackaMol::Molecule class.
133              
134             =head2 Common Selections: backbone, sidechains, protein, etc.
135              
136             Some common selections are included for convenience: backbone, sidechains, protein, water, ligands, and metals.
137              
138             my $bb = $mol->select_group('backbone');
139              
140             =head2 Novel selections using strings: e.g. 'chain E', 'Z 8', 'chain E .and. Z 6'
141              
142             Strings are used for novel selections, the simplest selection being the pair of one attribute with one value separated by a space.
143             For example, "chain E" will split the string and return all those that match (atom->chain eq 'E').
144              
145             my $enzyme = $mol->select_group('chain E');
146              
147             This will work for any attribute (e.g. atom->Z == 8). This approach requires less perl know-how than the equivalent,
148              
149             my @enzyme_atoms = grep{$_->chain eq 'E'} $mol->all_atoms;
150             my $enzyme = HackaMol::AtomGroup->new(atoms=>[@enzyme_atoms]);
151              
152             More complex selections are also straightforward using the following operators:
153              
154             .or. matches if an atom satisfies either selection (separated by .or.)
155             .and. matches if an atom satisfies both selections (separated by .and.)
156             .within. less than or equal to for numeric attributes
157             .beyond. greater than or equal to for numeric attributes
158             .not. everything but
159              
160             More, such as .around. will be added as needs arise. Let's take a couple of examples.
161              
162             1. To select all the tyrosines from chain E,
163              
164             my $TYR_E = $mol->select_group('chain E .and. resname TYR');
165              
166             2. To choose both chain E and chain I,
167              
168             my $two_chains = $mol->select_group('chain E .or. chain I');
169              
170             Parenthesis are also supported to allow selection precedence.
171              
172             3. To select all the tyrosines from chain E along with all the tyrosines from chain I,
173              
174             my $TYR_EI = $mol->select_group('(resname TYR .and. chain E) .or. (resname TYR .and. chain I)');
175              
176             4. To select all atoms with occupancies between 0.5 and 0.95,
177              
178             my $occs = $mol->select_group('(occ .within. 0.95) .and. (occ .beyond. 0.5)');
179              
180             The common selections (protein, water, backbone, sidechains) can also be used in the selections. For example, select
181             chain I but not the chain I water molecules (sometimes the water molecules get the chain id),
182              
183             my $chain_I = $mol->select_group('chain I .and. .not. water');
184              
185             =head2 Extreme selections using code references.
186              
187             The role also provides the an attribute with hash traits that can be used to create, insanely flexible, selections using code references.
188             As long as the code reference returns a list of atoms, you can do whatever you want. For example, let's define a sidechains selection; the
189             key will be a simple string ("sidechains") and the value will be an anonymous subroutine.
190             For example,
191              
192             $mol->set_selection_cr("my_sidechains" => sub {grep { $_->record_name eq 'ATOM' and not
193             ( $_->name eq 'N' or $_->name eq 'CA'
194             or $_->name eq 'C' or $_->name eq 'Flowers and sausages')
195             } @_ }
196             );
197              
198             Now $mol->select_group('my_sidechains') will return a group corresponding to the selection defined above. If you were to rename
199             "my_sidechains" to "sidechains", your "sidechains" would be loaded in place of the common selection "sidechains" because of the priority
200             described below in the select_group method.
201              
202             =head1 METHODS
203              
204             =head2 set_selections_cr
205              
206             two arguments: a string and a coderef
207              
208             =head2 select_group
209              
210             takes one argument (string) and returns a HackaMol::AtomGroup object containing the selected atoms. Priority: the select_group method looks at
211             selections_cr first, then the common selections, and finally, if there were no known selections, it passes the argument to be processed
212             using regular expressions.
213              
214             =head1 ATTRIBUTES
215              
216             =head2 selections_cr
217              
218             isa HashRef[CodeRef] that is lazy with public Hash traits. This attribute allows the user to use code references in the atom selections.
219             The list of atoms, contained in the role consuming object, will be passed to the code reference, and a list of atoms is the expected output
220             of the code reference, e.g.
221              
222             @new_atoms = &{$code_ref}(@atoms);
223              
224             =head1 SYNOPSIS
225              
226             # load 2SIC from the the RCSB.org and pull out two groups: the enzyme (chain E) and the inhibitor (chain I)
227              
228             use HackaMol;
229             use Moose::Util qw( ensure_all_roles ); # to apply the role to the molecule object
230              
231             my $mol = HackaMol->new->pdbid_mol("2sic"); #returns HackaMol::Molecule
232              
233             ensure_all_roles($mol, 'HackaMol::Roles::SelectionRole') # now $mol has the select_group method;
234              
235             my $enzyme = $mol->select_group("chain E");
236             my $inhib = $mol->select_group("chain I");
237              
238             =head1 WARNING
239              
240             This is still under active development and may change or just not work. I still need to add warnings to help with bad
241             selections. Let me know if you have problems or suggestions!
242              
243             =head1 AUTHOR
244              
245             Demian Riccardi <demianriccardi@gmail.com>
246              
247             =head1 COPYRIGHT AND LICENSE
248              
249             This software is copyright (c) 2017 by Demian Riccardi.
250              
251             This is free software; you can redistribute it and/or modify it under
252             the same terms as the Perl 5 programming language system itself.
253              
254             =cut