File Coverage

blib/lib/HackaMol/Roles/SelectionRole.pm
Criterion Covered Total %
statement 32 32 100.0
branch 4 4 100.0
condition n/a
subroutine 6 6 100.0
pod 1 1 100.0
total 43 43 100.0


line stmt bran cond sub pod time code
1             package HackaMol::Roles::SelectionRole;
2             $HackaMol::Roles::SelectionRole::VERSION = '0.002';
3             #ABSTRACT: Atom selections in molecules
4 1     1   1148653 use Moose::Role;
  1         2  
  1         9  
5 1     1   3918 use HackaMol::AtomGroup;
  1         2  
  1         28  
6 1     1   13 use Carp;
  1         1  
  1         378  
7              
8             my %common_selections = (
9             'backbone' => '$_->record_name eq "ATOM" and ( $_->name eq "N" or $_->name eq "CA" or $_->name eq "C" )',
10             'water' => '$_->resname =~ m/HOH|TIP|H2O/ and $_->record_name eq "HETATM"',
11             'protein' => '$_->record_name eq "ATOM"',
12             'ligands' => '($_->resname !~ m/HOH|TIP|H2O/ ) and $_->record_name eq "HETATM"',
13             'sidechains' => '$_->record_name eq "ATOM"
14             and not( $_->name eq "N" or $_->name eq "CA" or $_->name eq "C" )',
15             'metals' => '$_->symbol =~ m/Li|Be|Na|Mg|K|Ca|Sc|Ti|V|Cr|Mn|Fe|Co|Ni|Cu|Zn|Rb|Sr|Y|Zr|Nb|Mo|Tc|Ru|Rh|Pd|Ag|Cd|Cs|Ba|La|Ce|Pr|Nd|Pm|Sm|Eu|Gd|Tb|Dy|Ho|Er|Tm|Yb|Lu|Hf|Ta|W|Re|Os|Ir|Pt|Au|Hg/',
16             );
17              
18             has 'selections_cr' => (
19             traits => ['Hash'],
20             is => 'ro',
21             isa => 'HashRef[CodeRef]',
22             default => sub { {} },
23             handles => {
24             get_selection_cr => 'get',
25             set_selection_cr => 'set',
26             has_selections_cr => 'count',
27             keys_selection_cr => 'keys',
28             delete_selection_cr => 'delete',
29             has_selection_cr => 'exists',
30             },
31             );
32              
33             sub select_group {
34              
35 10     10 1 11912 my $self = shift;
36 10         14 my $selection = shift;
37 10         13 my $method;
38              
39 10 100       384 if ($self->has_selection_cr($selection)){ #attr takes priority so user can change
    100          
40 1         38 $method = $self->get_selection_cr($selection);
41             }
42             elsif ( exists( $common_selections{$selection} ) ) {
43 6         888 $method = eval("sub{ grep{ $common_selections{$selection} } \@_ }");
44             }
45             else {
46 3         11 $method = _regex_method($selection);
47             }
48              
49             #grep { &{ sub{ $_%2 } }($_)} 1..10
50              
51             my $group =
52 10         348 HackaMol::AtomGroup->new( atoms => [ &{$method}( $self->all_atoms ) ], );
  10         1970  
53              
54 10         203056 return ($group);
55              
56             }
57              
58             # $mol->select_group('(chain A .or. (resname TYR .and. chain B)) .and. occ .within. 1')
59             # becomes grep{($_->chain eq A or ($_->resname eq TYR and $_->chain eq 'B')) and $_->occ <= 1.0}
60              
61             sub _regex_method {
62 3     3   5 my $str = shift;
63              
64             #print "$str not implemented yet"; return(sub{0});
65             #my @parenth = $str =~ /(\(([^()]|(?R))*\))/g
66              
67 3         35 $str =~ s/(\w+)\s+([A-Za-z]+)/\$\_->$1 eq \'$2\'/g;
68 3         15 $str =~ s/(\w+)\s+(\d+)/\$\_->$1 == $2/g;
69 3         4 $str =~ s/(\w+)\s+\.within\.\s+(\d+)/\$\_->$1 <= $2/g;
70 3         5 $str =~ s/(\w+)\s+\.beyond\.\s+(\d+)/\$\_->$1 >= $2/g;
71 3         112 $str =~ s/$_/\($common_selections{$_}\)/g foreach keys %common_selections;
72 3         10 $str =~ s/\.and\./and/g;
73 3         5 $str =~ s/\.or\./or/g;
74 3         5 $str =~ s/\.not\./not/g;
75              
76 3         358 return ( eval("sub{ grep{ $str } \@_ }") );
77             }
78              
79              
80              
81 1     1   5 no Moose::Role;
  1         1  
  1         5  
82              
83             1;
84              
85             __END__
86              
87             =pod
88              
89             =head1 NAME
90              
91             HackaMol::Roles::SelectionRole - Atom selections in molecules
92              
93             =head1 VERSION
94              
95             version 0.002
96              
97             =head1 DESCRIPTION
98              
99             The goal of HackaMol::Roles::SelectionRole is to simplify atom selections. This role is not loaded with the core; it
100             must be applied as done in the synopsis. The method commonly used is select_group, which uses regular expressions to convert
101             a string argument to construct a method for filtering; a HackaMol::AtomGroup is returned. The select_group method operates
102             on atoms contained within the object to which the role is applied (i.e. $self->all_atoms). The role is envisioned for
103             instances of the HackaMol::Molecule class.
104              
105             =head2 Common Selections: backbone, sidechains, protein, etc.
106              
107             Some common selections are included for convenience: backbone, sidechains, protein, water, ligands, and metals.
108              
109             my $bb = $mol->select_group('backbone');
110              
111             =head2 Novel selections using strings: e.g. 'chain E', 'Z 8', 'chain E .and. Z 6'
112              
113             Strings are used for novel selections, the simplest selection being the pair of one attribute with one value separated by a space.
114             For example, "chain E" will split the string and return all those that match (atom->chain eq 'E').
115              
116             my $enzyme = $mol->select_group('chain E');
117              
118             This will work for any attribute (e.g. atom->Z == 8). This approach requires less perl know-how than the equivalent,
119              
120             my @enzyme_atoms = grep{$_->chain eq 'E'} $mol->all_atoms;
121             my $enzyme = HackaMol::AtomGroup->new(atoms=>[@enzyme_atoms]);
122              
123             More complex selections are also straightforward using the following operators:
124              
125             .or. matches if an atom satisfies either selection (separated by .or.)
126             .and. matches if an atom satisfies both selections (separated by .and.)
127             .within. less than or equal to for numeric attributes
128             .beyond. greater than or equal to for numeric attributes
129             .not. everything but
130              
131             More, such as .around. will be added as needs arise. Let's take a couple of examples.
132              
133             1. To select all the tyrosines from chain E,
134              
135             my $TYR_E = $mol->select_group('chain E .and. resname TYR');
136              
137             2. To choose both chain E and chain I,
138              
139             my $two_chains = $mol->select_group('chain E .or. chain I');
140              
141             Parenthesis are also supported to allow selection precedence.
142              
143             3. To select all the tyrosines from chain E along with all the tyrosines from chain I,
144              
145             my $TYR_EI = $mol->select_group('(resname TYR .and. chain E) .or. (resname TYR .and. chain I)');
146              
147             4. To select all atoms with occupancies between 0.5 and 0.95,
148              
149             my $occs = $mol->select_group('(occ .within. 0.95) .and. (occ .beyond. 0.5)');
150              
151             The common selections (protein, water, backbone, sidechains) can also be used in the selections. For example, select
152             chain I but not the chain I water molecules (sometimes the water molecules get the chain id),
153              
154             my $chain_I = $mol->select_group('chain I .and. .not. water');
155              
156             =head2 Extreme selections using code references.
157              
158             The role also provides the an attribute with hash traits that can be used to create, insanely flexible, selections using code references.
159             As long as the code reference returns a list of atoms, you can do whatever you want. For example, let's define a sidechains selection; the
160             key will be a simple string ("sidechains") and the value will be an anonymous subroutine.
161             For example,
162              
163             $mol->set_selection_cr("my_sidechains" => sub {grep { $_->record_name eq 'ATOM' and not
164             ( $_->name eq 'N' or $_->name eq 'CA'
165             or $_->name eq 'C' or $_->name eq 'Flowers and sausages')
166             } @_ }
167             );
168              
169             Now $mol->select_group('my_sidechains') will return a group corresponding to the selection defined above. If you were to rename
170             "my_sidechains" to "sidechains", your "sidechains" would be loaded in place of the common selection "sidechains" because of the priority
171             described below in the select_group method.
172              
173             =head1 METHODS
174              
175             =head2 set_selections_cr
176              
177             two arguments: a string and a coderef
178              
179             =head2 select_group
180              
181             takes one argument (string) and returns a HackaMol::AtomGroup object containing the selected atoms. Priority: the select_group method looks at
182             selections_cr first, then the common selections, and finally, if there were no known selections, it passes the argument to be processed
183             using regular expressions.
184              
185             =head1 ATTRIBUTES
186              
187             =head2 selections_cr
188              
189             isa HashRef[CodeRef] that is lazy with public Hash traits. This attribute allows the user to use code references in the atom selections.
190             The list of atoms, contained in the role consuming object, will be passed to the code reference, and a list of atoms is the expected output
191             of the code reference, e.g.
192              
193             @new_atoms = &{$code_ref}(@atoms);
194              
195             =head1 SYNOPSIS
196              
197             # load 2SIC from the the RCSB.org and pull out two groups: the enzyme (chain E) and the inhibitor (chain I)
198              
199             use HackaMol;
200             use Moose::Util qw( ensure_all_roles ); # to apply the role to the molecule object
201              
202             my $mol = HackaMol->new->pdbid_mol("2sic"); #returns HackaMol::Molecule
203              
204             ensure_all_roles($mol, 'HackaMol::Roles::SelectionRole') # now $mol has the select_group method;
205              
206             my $enzyme = $mol->select_group("chain E");
207             my $inhib = $mol->select_group("chain I");
208              
209             =head1 WARNING
210              
211             This is still under active development and may change or just not work. I still need to add warnings to help with bad
212             selections. Let me know if you have problems or suggestions!
213              
214             =head1 AUTHOR
215              
216             Demian Riccardi <demianriccardi@gmail.com>
217              
218             =head1 COPYRIGHT AND LICENSE
219              
220             This software is copyright (c) 2016 by Demian Riccardi.
221              
222             This is free software; you can redistribute it and/or modify it under
223             the same terms as the Perl 5 programming language system itself.
224              
225             =cut