File Coverage

blib/lib/HackaMol/Roles/SelectionRole.pm
Criterion Covered Total %
statement 41 41 100.0
branch 6 6 100.0
condition n/a
subroutine 6 6 100.0
pod 1 1 100.0
total 54 54 100.0


line stmt bran cond sub pod time code
1             package HackaMol::Roles::SelectionRole;
2             $HackaMol::Roles::SelectionRole::VERSION = '0.051';
3             #ABSTRACT: Atom selections in molecules
4 14     14   8455 use Moose::Role;
  14         33  
  14         123  
5 14     14   79761 use HackaMol::AtomGroup;
  14         78  
  14         535  
6 14     14   95 use Carp;
  14         30  
  14         10712  
7              
8              
9             my %common_selection = (
10             'sidechain' => '$_->record_name eq "ATOM" and not $_->name =~ /^(N|CA|C|O|OXT)$/',
11             'backbone' => '$_->record_name eq "ATOM" and $_->name =~ /^(N|CA|C|O)$/', # backbone restricted to ATOM to avoid HETATM weirdness, e.g. het cys in 1v1q
12             'water' => '$_->resname =~ m/HOH|TIP|H2O/ and $_->record_name eq "HETATM"',
13             'protein' => '$_->record_name eq "ATOM"',
14             'ligands' => '($_->resname !~ m/HOH|TIP|H2O/ ) and $_->record_name eq "HETATM"',
15             'metals' => '$_->symbol =~ m/^(Li|Be|Na|Mg|K|Ca|Sc|Ti|V|Cr|Mn|Fe|Co|Ni|Cu|Zn|Rb|Sr|Y|Zr|Nb|Mo|Tc|Ru|Rh|Pd|Ag|Cd|Cs|Ba|La|Ce|Pr|Nd|Pm|Sm|Eu|Gd|Tb|Dy|Ho|Er|Tm|Yb|Lu|Hf|Ta|W|Re|Os|Ir|Pt|Au|Hg)$/',
16             );
17              
18             has 'selection' => (
19             traits => ['Hash'],
20             is => 'ro',
21             isa => 'HashRef[Str]',
22             lazy => 1,
23             default => sub { {} },
24             handles => {
25             get_selection => 'get',
26             set_selection => 'set',
27             has_selection => 'count',
28             keys_selection => 'keys',
29             delete_selection => 'delete',
30             has_selection => 'exists',
31             },
32             );
33              
34              
35             has 'selections_cr' => (
36             traits => ['Hash'],
37             is => 'ro',
38             isa => 'HashRef[CodeRef]',
39             default => sub { {} },
40             lazy => 1,
41             handles => {
42             get_selection_cr => 'get',
43             set_selection_cr => 'set',
44             has_selections_cr => 'count',
45             keys_selection_cr => 'keys',
46             delete_selection_cr => 'delete',
47             has_selection_cr => 'exists',
48             },
49             );
50              
51             sub select_group {
52              
53 37     37 1 845 my $self = shift;
54 37         109 my $selection = shift;
55 37         65 my $method;
56              
57 37 100       1551 if ($self->has_selection_cr($selection)){ #attr takes priority so user can change
    100          
58 1         42 $method = $self->get_selection_cr($selection);
59             }
60             elsif ( exists( $common_selection{$selection} ) ) {
61 6         1165 $method = eval("sub{ grep{ $common_selection{$selection} } \@_ }");
62             }
63             else {
64 30         124 $method = _regex_method($selection);
65             }
66              
67             #grep { &{ sub{ $_%2 } }($_)} 1..10
68              
69             my $group =
70 37         1539 HackaMol::AtomGroup->new( atoms => [ &{$method}( $self->all_atoms ) ], );
  37         969  
71              
72 37         2604 return ($group);
73              
74             }
75              
76             # $mol->select_group('(chain A .or. (resname TYR .and. chain B)) .and. occ .within. 1')
77             # becomes grep{($_->chain eq A or ($_->resname eq TYR and $_->chain eq 'B')) and $_->occ <= 1.0}
78              
79             sub _regex_method {
80 30     30   65 my $str = shift;
81            
82             # allow and or .and. .or. ... does this cause other problems with names?
83 30         101 $str =~ s/\sand\s/ \.and\. /g;
84 30         74 $str =~ s/\sor\s/ \.or\. /g;
85 30         77 $str =~ s/\snot\s/ \.not\. /g;
86              
87             #print "$str not implemented yet"; return(sub{0});
88             #my @parenth = $str =~ /(\(([^()]|(?R))*\))/g
89              
90             # ranges resid 1+3-10+20 -> resid =~ /^(1|3|4|5|6|7|8|9|10|20)$/
91 30         220 my @ranges = $str =~ /(\w+\s+(?:\w+|\d+)(?:\+|\-)[^\s]+)/g;
92 30         102 foreach my $range (@ranges){
93 6         36 my ($attr,$sel) = split(/\s+/, $range);
94             #$range =~ s/\+/\\+/g;
95             #$range =~ s/\-/\\-/g;
96 6 100       41 my $gsel = join '|',map{/(.+)-(.+)/ ? ($1 .. $2) : $_ } split('\+', $sel );
  13         110  
97 6         151 $str =~ s/\Q$range\E/\$\_->$attr =~ \/^($gsel)\$\//g;
98             }
99              
100 30         284 $str =~ s/(\w+)\s+(\d*[A-Za-z]+\d*)/\$\_->$1 eq \'$2\'/g; # resnames must have at least 1 letter
101 30         299 $str =~ s/(\w+)\s+(-?\d+)/\$\_->$1 eq $2/g;
102 30         94 $str =~ s/(\w+)\s+\.within\.\s+(\d+)/\$\_->$1 <= $2/g;
103 30         72 $str =~ s/(\w+)\s+\.beyond\.\s+(\d+)/\$\_->$1 >= $2/g;
104 30         1321 $str =~ s/$_/\($common_selection{$_}\)/g foreach keys %common_selection;
105 30         145 $str =~ s/\.and\./and/g;
106 30         79 $str =~ s/\.or\./or/g;
107 30         71 $str =~ s/\.not\./not/g;
108              
109 30         4311 return ( eval("sub{ grep{ $str } \@_ }") );
110             }
111              
112              
113              
114 14     14   139 no Moose::Role;
  14         39  
  14         76  
115              
116             1;
117              
118             __END__
119              
120             =pod
121              
122             =head1 NAME
123              
124             HackaMol::Roles::SelectionRole - Atom selections in molecules
125              
126             =head1 VERSION
127              
128             version 0.051
129              
130             =head1 DESCRIPTION
131              
132             The goal of HackaMol::Roles::SelectionRole is to simplify atom selections. This role is not loaded with the core; it
133             must be applied as done in the synopsis. The method commonly used is select_group, which uses regular expressions to convert
134             a string argument to construct a method for filtering; a HackaMol::AtomGroup is returned. The select_group method operates
135             on atoms contained within the object to which the role is applied (i.e. $self->all_atoms). The role is envisioned for
136             instances of the HackaMol::Molecule class.
137              
138             =head2 Common Selections: backbone, sidechains, protein, etc.
139              
140             Some common selections are included for convenience: backbone, sidechains, protein, water, ligands, and metals.
141              
142             my $bb = $mol->select_group('backbone');
143              
144             =head2 Novel selections using strings: e.g. 'chain E', 'Z 8', 'chain E .and. Z 6'
145              
146             Strings are used for novel selections, the simplest selection being the pair of one attribute with one value separated by a space.
147             For example, "chain E" will split the string and return all those that match (atom->chain eq 'E').
148              
149             my $enzyme = $mol->select_group('chain E');
150              
151             This will work for any attribute (e.g. atom->Z == 8). This approach requires less perl know-how than the equivalent,
152              
153             my @enzyme_atoms = grep{$_->chain eq 'E'} $mol->all_atoms;
154             my $enzyme = HackaMol::AtomGroup->new(atoms=>[@enzyme_atoms]);
155              
156             More complex selections are also straightforward using the following operators:
157              
158             .or. matches if an atom satisfies either selection (separated by .or.)
159             .and. matches if an atom satisfies both selections (separated by .and.)
160             .within. less than or equal to for numeric attributes
161             .beyond. greater than or equal to for numeric attributes
162             .not. everything but
163              
164             More, such as .around. will be added as needs arise. Let's take a couple of examples.
165              
166             1. To select all the tyrosines from chain E,
167              
168             my $TYR_E = $mol->select_group('chain E .and. resname TYR');
169              
170             2. To choose both chain E and chain I,
171              
172             my $two_chains = $mol->select_group('chain E .or. chain I');
173              
174             Parenthesis are also supported to allow selection precedence.
175              
176             3. To select all the tyrosines from chain E along with all the tyrosines from chain I,
177              
178             my $TYR_EI = $mol->select_group('(resname TYR .and. chain E) .or. (resname TYR .and. chain I)');
179              
180             4. To select all atoms with occupancies between 0.5 and 0.95,
181              
182             my $occs = $mol->select_group('(occ .within. 0.95) .and. (occ .beyond. 0.5)');
183              
184             The common selections (protein, water, backbone, sidechains) can also be used in the selections. For example, select
185             chain I but not the chain I water molecules (sometimes the water molecules get the chain id),
186              
187             my $chain_I = $mol->select_group('chain I .and. .not. water');
188              
189             =head2 Extreme selections using code references.
190              
191             The role also provides the an attribute with hash traits that can be used to create, insanely flexible, selections using code references.
192             As long as the code reference returns a list of atoms, you can do whatever you want. For example, let's define a sidechains selection; the
193             key will be a simple string ("sidechains") and the value will be an anonymous subroutine.
194             For example,
195              
196             $mol->set_selection_cr("my_sidechains" => sub {grep { $_->record_name eq 'ATOM' and not
197             ( $_->name eq 'N' or $_->name eq 'CA'
198             or $_->name eq 'C' or $_->name eq 'Flowers and sausages')
199             } @_ }
200             );
201              
202             Now $mol->select_group('my_sidechains') will return a group corresponding to the selection defined above. If you were to rename
203             "my_sidechains" to "sidechains", your "sidechains" would be loaded in place of the common selection "sidechains" because of the priority
204             described below in the select_group method.
205              
206             =head1 METHODS
207              
208             =head2 set_selections_cr
209              
210             two arguments: a string and a coderef
211              
212             =head2 select_group
213              
214             takes one argument (string) and returns a HackaMol::AtomGroup object containing the selected atoms. Priority: the select_group method looks at
215             selections_cr first, then the common selections, and finally, if there were no known selections, it passes the argument to be processed
216             using regular expressions.
217              
218             =head1 ATTRIBUTES
219              
220             =head2 selections_cr
221              
222             isa HashRef[CodeRef] that is lazy with public Hash traits. This attribute allows the user to use code references in the atom selections.
223             The list of atoms, contained in the role consuming object, will be passed to the code reference, and a list of atoms is the expected output
224             of the code reference, e.g.
225              
226             @new_atoms = &{$code_ref}(@atoms);
227              
228             =head1 SYNOPSIS
229              
230             # load 2SIC from the the RCSB.org and pull out two groups: the enzyme (chain E) and the inhibitor (chain I)
231              
232             use HackaMol;
233             use Moose::Util qw( ensure_all_roles ); # to apply the role to the molecule object
234              
235             my $mol = HackaMol->new->pdbid_mol("2sic"); #returns HackaMol::Molecule
236              
237             ensure_all_roles($mol, 'HackaMol::Roles::SelectionRole') # now $mol has the select_group method;
238              
239             my $enzyme = $mol->select_group("chain E");
240             my $inhib = $mol->select_group("chain I");
241              
242             =head1 WARNING
243              
244             This is still under active development and may change or just not work. I still need to add warnings to help with bad
245             selections. Let me know if you have problems or suggestions!
246              
247             =head1 AUTHOR
248              
249             Demian Riccardi <demianriccardi@gmail.com>
250              
251             =head1 COPYRIGHT AND LICENSE
252              
253             This software is copyright (c) 2017 by Demian Riccardi.
254              
255             This is free software; you can redistribute it and/or modify it under
256             the same terms as the Perl 5 programming language system itself.
257              
258             =cut