| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Bio::MaxQuant::Evidence::Statistics; |
|
2
|
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
116425
|
use 5.006; |
|
|
2
|
|
|
|
|
10
|
|
|
|
2
|
|
|
|
|
78
|
|
|
4
|
2
|
|
|
2
|
|
11
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
504
|
|
|
5
|
2
|
|
|
2
|
|
13
|
use warnings; |
|
|
2
|
|
|
|
|
34
|
|
|
|
2
|
|
|
|
|
61
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
2
|
|
|
2
|
|
25849
|
use Text::CSV; |
|
|
2
|
|
|
|
|
41290
|
|
|
|
2
|
|
|
|
|
71
|
|
|
8
|
2
|
|
|
2
|
|
76
|
use Carp; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
174
|
|
|
9
|
2
|
|
|
2
|
|
2970
|
use Storable; |
|
|
2
|
|
|
|
|
7531
|
|
|
|
2
|
|
|
|
|
132
|
|
|
10
|
2
|
|
|
2
|
|
1769
|
use Statistics::Distributions; |
|
|
2
|
|
|
|
|
6904
|
|
|
|
2
|
|
|
|
|
8860
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Bio::MaxQuant::Evidence::Statistics - Additional statistics on your SILAC evidence |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 VERSION |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Version 0.01 |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=cut |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Read/convert your evidence file to a more rapidly processable format, |
|
28
|
|
|
|
|
|
|
and perform various operations and statistics across/between multiple |
|
29
|
|
|
|
|
|
|
experiments. Supports multidimensional experiments with replicate |
|
30
|
|
|
|
|
|
|
analyses. |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
use Bio::MaxQuant::Evidence::Statistics; |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my $foo = Bio::MaxQuant::Evidence::Statistics->new(); |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# get the essential data from an evidence file |
|
38
|
|
|
|
|
|
|
$foo->parseEssentials(filename=>$evidencePath); |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# store the essentials for later |
|
41
|
|
|
|
|
|
|
$foo->writeEssentials(filename=>$essentialsPath); |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# laod previously stored essentials |
|
44
|
|
|
|
|
|
|
$foo->readEssentials(filename=>$essentialsPath); |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head2 new |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
Create a new object: |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
my $foo = Bio::MaxQuant::Evidence::Statistics->new(); |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=cut |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub new { |
|
58
|
8526
|
|
|
8526
|
1
|
244897
|
my $p = shift; |
|
59
|
8526
|
|
66
|
|
|
39854
|
my $c = ref($p) || $p; |
|
60
|
8526
|
|
|
|
|
212771
|
my %defaults = ( |
|
61
|
|
|
|
|
|
|
separator => "\t", |
|
62
|
|
|
|
|
|
|
essential_column_names => { |
|
63
|
|
|
|
|
|
|
'Protein group IDs' => 1, |
|
64
|
|
|
|
|
|
|
'Modified' => 1, |
|
65
|
|
|
|
|
|
|
'Leading Proteins' => 1, |
|
66
|
|
|
|
|
|
|
'PEP' => 1, |
|
67
|
|
|
|
|
|
|
'Ratio H/L' => 1, |
|
68
|
|
|
|
|
|
|
'Intensity H' => 1, |
|
69
|
|
|
|
|
|
|
'Intensity L' => 1, |
|
70
|
|
|
|
|
|
|
'Contaminant' => 1, |
|
71
|
|
|
|
|
|
|
'Reverse' => 1, |
|
72
|
|
|
|
|
|
|
}, |
|
73
|
|
|
|
|
|
|
list_column_names => { |
|
74
|
|
|
|
|
|
|
'Modified' => 1, |
|
75
|
|
|
|
|
|
|
'PEP' => 1, |
|
76
|
|
|
|
|
|
|
'Ratio H/L' => 1, |
|
77
|
|
|
|
|
|
|
'Intensity H' => 1, |
|
78
|
|
|
|
|
|
|
'Intensity L' => 1, |
|
79
|
|
|
|
|
|
|
}, |
|
80
|
|
|
|
|
|
|
key_column_name => 'id', |
|
81
|
|
|
|
|
|
|
experiment_column_name => 'Experiment', |
|
82
|
|
|
|
|
|
|
csv_options => {sep_char=>"\t"}, |
|
83
|
|
|
|
|
|
|
); |
|
84
|
8526
|
|
|
|
|
61038
|
my %options = (%defaults, @_); |
|
85
|
8526
|
|
|
|
|
38674
|
my $o = {defaults=>\%options}; |
|
86
|
8526
|
|
|
|
|
45526
|
bless $o, $c; |
|
87
|
8526
|
|
|
|
|
44095
|
return $o; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head2 parseEssentials(%options) |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Reads the essential data from an evidence file. Evidence files |
|
93
|
|
|
|
|
|
|
for large analyses can be very big and take a long time to process, |
|
94
|
|
|
|
|
|
|
to we only read what's necessary, and can save this for convenience |
|
95
|
|
|
|
|
|
|
and speed too, using writeEssentials(). |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
The data are stored by Protein group IDs, i.e. one entry per protein |
|
98
|
|
|
|
|
|
|
group. Other data stored here are: |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=over |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=item id |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=item Protein group IDs |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=item Modified -- is this actually the right name?? |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item Leading Proteins |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=item Experiment |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=item PEP |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=item Ratio H/L |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item Intensity H |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item Intensity L |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=item Contaminant |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item Reverse |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=back |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
The column names used for storage are defined in the default option |
|
127
|
|
|
|
|
|
|
essential_column_names, and can be changed when you call new, or when you call |
|
128
|
|
|
|
|
|
|
parseEssentials. The option is a hash of column names whose values |
|
129
|
|
|
|
|
|
|
detmerine whether the column is kept by their truthness... e.g. |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
$o->parseEssentials(essential_column_names=>( |
|
132
|
|
|
|
|
|
|
'id' => 1, # kept |
|
133
|
|
|
|
|
|
|
'PEP' => 0, # discarded |
|
134
|
|
|
|
|
|
|
#foo => ?, # discarded |
|
135
|
|
|
|
|
|
|
)); |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
If a column doesn't exist, it does not complain! |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
The method takes a hash of options. |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
options: |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=over |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=item filename - path of the file to process |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=item separator - passed to Text::CSV (default is tab) |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=item key_column_name - change the column keyed on (default is id) |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=item experiment_column_name - change the column the data are split on |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item list_column_names - change the columns stored as lists |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=back |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head3 list_column_names |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Some columns are the same across all the evidence in a protein group, |
|
160
|
|
|
|
|
|
|
eg, the id is obviously the same, Contaminant and Reverse, Protein IDs, |
|
161
|
|
|
|
|
|
|
and so on. The default, therefore, is to overwrite the column with |
|
162
|
|
|
|
|
|
|
the value seen in an evidence. BUT, some columns have a different value |
|
163
|
|
|
|
|
|
|
in each evidence, e.g. Ratio H/L or PEP. Whatever columns are given in |
|
164
|
|
|
|
|
|
|
list_column_names, which true values, will be appended as lists, so in the |
|
165
|
|
|
|
|
|
|
final data, there will be one row per protein and any column bearing multiple |
|
166
|
|
|
|
|
|
|
evidences for that protein will be a list. |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
If that makes no sense, write to me and I'll try to change it. |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=cut |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub parseEssentials { |
|
173
|
1
|
|
|
1
|
1
|
32
|
my $o = shift; |
|
174
|
1
|
|
|
|
|
3
|
my %defaults = %{$o->{defaults}}; |
|
|
1
|
|
|
|
|
7
|
|
|
175
|
1
|
|
|
|
|
9
|
my %options = (%defaults, @_); |
|
176
|
1
|
|
|
|
|
109
|
my $io = IO::File->new($options{filename}, 'r'); |
|
177
|
1
|
|
|
|
|
10403
|
my $csv = Text::CSV->new($options{csv_options}); |
|
178
|
|
|
|
|
|
|
# head the column names, just like in the pod... |
|
179
|
1
|
|
|
|
|
104
|
$csv->column_names ($csv->getline ($io)); |
|
180
|
|
|
|
|
|
|
# now getline_hr will give us hashrefs :-) |
|
181
|
|
|
|
|
|
|
# we just need to know which to keep... |
|
182
|
1
|
|
|
|
|
2762
|
my %k = %{$options{essential_column_names}}; |
|
|
1
|
|
|
|
|
11
|
|
|
183
|
1
|
|
|
|
|
3
|
my $i = $options{key_column_name}; |
|
184
|
1
|
|
|
|
|
2
|
my $e = $options{experiment_column_name}; |
|
185
|
1
|
|
|
|
|
1
|
my %l = %{$options{list_column_names}}; |
|
|
1
|
|
|
|
|
5
|
|
|
186
|
1
|
|
|
|
|
3
|
my %data = (); |
|
187
|
1
|
|
|
|
|
3
|
my @ids = (); |
|
188
|
1
|
|
|
|
|
2
|
my @sharedids = (); |
|
189
|
1
|
|
|
|
|
2
|
my @uniqueids = (); |
|
190
|
|
|
|
|
|
|
|
|
191
|
1
|
|
|
|
|
6
|
while(! eof($io)){ |
|
192
|
2793
|
|
|
|
|
14698
|
my $hr = $csv->getline_hr($io); |
|
193
|
192717
|
100
|
66
|
|
|
733196
|
my %h = map { |
|
194
|
2793
|
|
|
|
|
10827691
|
exists $k{$_} && $k{$_} # exists and true |
|
195
|
|
|
|
|
|
|
? ($_=>$hr->{$_}) # key => value |
|
196
|
|
|
|
|
|
|
: () # empty |
|
197
|
|
|
|
|
|
|
} keys %$hr; |
|
198
|
2793
|
|
|
|
|
20018
|
my $key = $hr->{$i}; |
|
199
|
2793
|
|
|
|
|
6330
|
my $expt = $hr->{$e}; |
|
200
|
|
|
|
|
|
|
# store it... |
|
201
|
2793
|
|
|
|
|
5536
|
push @ids, $key; # keep track of what we've got |
|
202
|
|
|
|
|
|
|
# store stuff by expt, then id, then column |
|
203
|
2793
|
100
|
|
|
|
10747
|
$data{$expt} = {} unless exists $data{$expt}; |
|
204
|
22344
|
100
|
66
|
|
|
168005
|
$data{$expt}->{$key} = { # set up this expt/key... unless it exists |
|
205
|
|
|
|
|
|
|
map { |
|
206
|
2793
|
50
|
|
|
|
18909
|
exists $l{$_} && $l{$_} |
|
207
|
|
|
|
|
|
|
? ($_ => []) # it's an array |
|
208
|
|
|
|
|
|
|
: ($_ => '') # it's a scalar |
|
209
|
|
|
|
|
|
|
} keys %h |
|
210
|
|
|
|
|
|
|
} unless exists $data{$expt}->{$key}; |
|
211
|
|
|
|
|
|
|
# add the data... |
|
212
|
2793
|
|
|
|
|
35190
|
foreach (keys %h){ # each column |
|
213
|
22344
|
100
|
66
|
|
|
106263
|
if(exists $l{$_} && $l{$_}){ # is it a list column? |
|
214
|
11172
|
|
|
|
|
15756
|
push @{$data{$expt}->{$key}->{$_}}, $h{$_}; # push it |
|
|
11172
|
|
|
|
|
57249
|
|
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
else { |
|
217
|
11172
|
|
|
|
|
43575
|
$data{$expt}->{$key}->{$_} = $h{$_}; # set it |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
} |
|
220
|
2793
|
100
|
|
|
|
24661
|
if($data{$expt}->{$key}->{'Protein group IDs'} =~ /;/){ |
|
221
|
400
|
|
|
|
|
19618
|
push @sharedids, $key; |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
else { |
|
224
|
2393
|
|
|
|
|
62966
|
push @uniqueids, $key; |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
} |
|
227
|
1
|
|
|
|
|
5
|
$o->{data} = \%data; |
|
228
|
1
|
|
|
|
|
176
|
$o->{ids} = [sort {$a <=> $b} @ids]; |
|
|
2792
|
|
|
|
|
5087
|
|
|
229
|
1
|
|
|
|
|
35
|
$o->{sharedids} = [sort {$a <=> $b} @sharedids]; |
|
|
399
|
|
|
|
|
615
|
|
|
230
|
1
|
|
|
|
|
104
|
$o->{uniqueids} = [sort {$a <=> $b} @uniqueids]; |
|
|
2392
|
|
|
|
|
4168
|
|
|
231
|
1
|
|
|
|
|
8937
|
$o->{cache} = {}; |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head2 experiments |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
Returns a list of the experiments in the data. |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=cut |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
sub experiments { |
|
241
|
876
|
|
|
876
|
1
|
4189
|
my $o = shift; |
|
242
|
876
|
|
|
|
|
3042
|
my $data = $o->{data}; |
|
243
|
876
|
|
|
|
|
10294
|
return keys %$data; |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head2 replicated |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
Returns a list of the experiment names without the replicate portion. |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
The names are assumed to be Cell.Condition.Replicate, i.e. full-stop (period) separated. |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=cut |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub replicated { |
|
255
|
5
|
|
|
5
|
1
|
11
|
my $o = shift; |
|
256
|
5
|
100
|
|
|
|
26
|
return @{$o->{cache}->{replicated}} if exists $o->{cache}->{replicated}; |
|
|
3
|
|
|
|
|
20
|
|
|
257
|
2
|
|
|
|
|
5
|
my %repl = (); |
|
258
|
2
|
|
|
|
|
9
|
my @expts = $o->experiments(); |
|
259
|
2
|
|
|
|
|
12
|
foreach (@expts){ |
|
260
|
54
|
|
|
|
|
177
|
s/\.[^.]+$//; |
|
261
|
54
|
|
|
|
|
112
|
$repl{$_} = 1; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
2
|
|
|
|
|
16
|
$o->{cache}->{replicated} = [keys %repl]; |
|
264
|
2
|
|
|
|
|
12
|
return @{$o->{cache}->{replicated}}; |
|
|
2
|
|
|
|
|
29
|
|
|
265
|
|
|
|
|
|
|
} |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head2 orthogonals |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Returns a list of sets of orthogonal experiments, that is 3 experiments in which the first has |
|
270
|
|
|
|
|
|
|
one condition in common with the other two, but they have nothing in common with each other. |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
e.g. A.X A.Y B.X |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
The rationale behind this is that quantitative differences across this set indicate mechanistic |
|
275
|
|
|
|
|
|
|
links between, for example, cell line and drug treatment. If a reponse is seen to a drug, and |
|
276
|
|
|
|
|
|
|
a different repsonse is seen in a different cell-type, this system will pick that up. The |
|
277
|
|
|
|
|
|
|
fourth member of the comparison (in the example that would be B.Y) could be anything... and the |
|
278
|
|
|
|
|
|
|
interpretation would still be that there is a differential response. |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=cut |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
sub orthogonals { |
|
283
|
9
|
|
|
9
|
1
|
26
|
my $o = shift; |
|
284
|
9
|
100
|
|
|
|
49
|
return @{$o->{cache}->{orthogonals}} if exists $o->{cache}->{orthogonals}; |
|
|
7
|
|
|
|
|
167
|
|
|
285
|
2
|
|
|
|
|
6
|
my @repls = $o->replicated(); |
|
286
|
2
|
|
|
|
|
7
|
my @orths = (); |
|
287
|
2
|
|
|
|
|
5
|
foreach my $c1(@repls){ |
|
288
|
18
|
|
|
|
|
47
|
my ($p,$x) = split(/\./, $c1, 2); |
|
289
|
18
|
|
|
|
|
31
|
foreach my $c2(@repls){ |
|
290
|
162
|
100
|
|
|
|
298
|
next if $c2 eq $c1; |
|
291
|
144
|
|
|
|
|
312
|
my ($q,$y) = split(/\./, $c2, 2); |
|
292
|
144
|
100
|
|
|
|
344
|
next unless $p eq $q; |
|
293
|
36
|
|
|
|
|
56
|
foreach my $c3(@repls){ |
|
294
|
324
|
100
|
100
|
|
|
1218
|
next if $c3 eq $c1 || $c3 eq $c2; |
|
295
|
252
|
|
|
|
|
513
|
my ($r,$z) = split(/\./, $c3, 2); |
|
296
|
252
|
100
|
|
|
|
631
|
next unless $x eq $z; |
|
297
|
72
|
|
|
|
|
212
|
push @orths, "$c1 $c2 $c3"; |
|
298
|
|
|
|
|
|
|
} |
|
299
|
|
|
|
|
|
|
} |
|
300
|
|
|
|
|
|
|
} |
|
301
|
2
|
|
|
|
|
9
|
$o->{cache}->{orthogonals} = \@orths; |
|
302
|
2
|
|
|
|
|
60
|
return @orths; |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=head2 pairs |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
Returns a list of pairs of replicated experiments (e.g. A.X A.Y, A.X B.X ...) |
|
308
|
|
|
|
|
|
|
that represents all possible comparisons. |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=cut |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
sub pairs { |
|
313
|
9
|
|
|
9
|
1
|
32
|
my $o = shift; |
|
314
|
9
|
100
|
|
|
|
67
|
return @{$o->{cache}->{pairs}} if exists $o->{cache}->{pairs}; |
|
|
7
|
|
|
|
|
180
|
|
|
315
|
2
|
|
|
|
|
12
|
my @r = $o->replicated(); |
|
316
|
2
|
|
|
|
|
7
|
my @pairs = (); |
|
317
|
2
|
|
|
|
|
17
|
foreach my $r1(sort @r){ |
|
318
|
18
|
|
|
|
|
68
|
foreach my $r2(sort @r){ |
|
319
|
162
|
100
|
|
|
|
322
|
next unless $r1 lt $r2; |
|
320
|
72
|
|
|
|
|
191
|
push @pairs, "$r1 $r2"; |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
} |
|
323
|
2
|
|
|
|
|
9
|
$o->{cache}->{pairs} = \@pairs; |
|
324
|
2
|
|
|
|
|
38
|
return @pairs; |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
=head2 ids |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
Returns a list of evidence ids in the data. |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=cut |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
sub ids { |
|
334
|
2
|
|
|
2
|
1
|
5
|
return @{shift()->{ids}}; |
|
|
2
|
|
|
|
|
18
|
|
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=head2 sharedIds |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
Returns a list containing the ids of those evidences shared between protein groups. |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=cut |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
sub sharedIds { |
|
344
|
2
|
|
|
2
|
1
|
826
|
return @{shift()->{sharedids}}; |
|
|
2
|
|
|
|
|
13
|
|
|
345
|
|
|
|
|
|
|
} |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=head2 uniqueIds |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
Returns a list containing the ids of those evidences unique to one protein group. |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=cut |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
sub uniqueIds { |
|
354
|
2
|
|
|
2
|
1
|
6
|
return @{shift()->{uniqueids}}; |
|
|
2
|
|
|
|
|
14
|
|
|
355
|
|
|
|
|
|
|
} |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=head2 saveEssentials(%options) |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
Save the essential data (quicker to read again in future) |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=cut |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
sub saveEssentials { |
|
364
|
1
|
|
|
1
|
1
|
811
|
my $o = shift; |
|
365
|
1
|
|
|
|
|
2
|
my %defaults = %{$o->{defaults}}; |
|
|
1
|
|
|
|
|
14
|
|
|
366
|
1
|
|
|
|
|
8
|
my %options = (%defaults, @_); |
|
367
|
|
|
|
|
|
|
# here we want to save everything |
|
368
|
1
|
|
|
|
|
10
|
store $o, $options{filename}; |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=head2 loadEssentials |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
Load up previously saved essentials |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=cut |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
sub loadEssentials { |
|
378
|
12
|
|
|
12
|
1
|
488851
|
my $o = shift; |
|
379
|
12
|
|
|
|
|
46
|
my %defaults = %{$o->{defaults}}; |
|
|
12
|
|
|
|
|
112
|
|
|
380
|
12
|
|
|
|
|
247
|
my %options = (%defaults, @_); |
|
381
|
12
|
|
|
|
|
98
|
my $p = retrieve($options{filename}); |
|
382
|
12
|
|
|
|
|
517213
|
%$o = %$p; |
|
383
|
12
|
|
|
|
|
257
|
return $o; |
|
384
|
|
|
|
|
|
|
} |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=head2 extractColumnValues |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=cut |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub extractColumnValues { |
|
392
|
8379
|
|
|
8379
|
1
|
31340
|
my ($o, %options) = @_; |
|
393
|
|
|
|
|
|
|
# options: |
|
394
|
8379
|
|
|
|
|
43524
|
my %defaults = ( |
|
395
|
|
|
|
|
|
|
column => 'id', # which column to collect |
|
396
|
|
|
|
|
|
|
experiment => '', # only extract this expt (all if false) |
|
397
|
|
|
|
|
|
|
'split' => 1, # true = split cell on ; before adding to results |
|
398
|
|
|
|
|
|
|
'nodup' => 1, # true = remove duplicates |
|
399
|
|
|
|
|
|
|
'emptiesok' => 0, # true = include empty values in output |
|
400
|
|
|
|
|
|
|
); |
|
401
|
8379
|
|
|
|
|
72925
|
%options = (%defaults, %options); |
|
402
|
8379
|
|
|
|
|
29336
|
my $data = $o->{data}; |
|
403
|
8379
|
100
|
|
|
|
35181
|
my $results = $options{nodup} ? {} : []; |
|
404
|
8379
|
50
|
|
|
|
65973
|
my @expts = $options{experiment} ? ($options{experiment}) : (keys %$data); |
|
405
|
8379
|
|
|
|
|
21929
|
foreach my $e(@expts){ |
|
406
|
9009
|
|
|
|
|
15520
|
foreach my $k(keys %{$data->{$e}}){ |
|
|
9009
|
|
|
|
|
143376
|
|
|
407
|
457848
|
|
|
|
|
1621126
|
my $value = $data->{$e}->{$k}->{$options{column}}; |
|
408
|
457848
|
100
|
|
|
|
1460648
|
if(ref($value) eq ''){ |
|
409
|
8379
|
|
|
|
|
41310
|
$value = [split /;/, $value]; |
|
410
|
|
|
|
|
|
|
} |
|
411
|
457848
|
50
|
|
|
|
1687922
|
my @values = $options{'split'} ? (@$value) : (join(';',@$value)); |
|
412
|
457848
|
|
|
|
|
873372
|
foreach (@values){ |
|
413
|
459048
|
100
|
66
|
|
|
2430962
|
next unless $_ ne '' || $options{'emptiesok'}; |
|
414
|
437416
|
100
|
|
|
|
1012527
|
if($options{nodup}){ |
|
415
|
51727
|
|
|
|
|
377677
|
$results->{$_} = 1; |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
else { |
|
418
|
385689
|
|
|
|
|
1858907
|
push @$results, $_; |
|
419
|
|
|
|
|
|
|
} |
|
420
|
|
|
|
|
|
|
} |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
# use Data::Dumper; |
|
424
|
|
|
|
|
|
|
# print STDERR Dumper $results; |
|
425
|
8379
|
100
|
|
|
|
179184
|
return $options{nodup} ? (keys %$results) : (@$results); |
|
426
|
|
|
|
|
|
|
} |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=head2 proteinCount |
|
429
|
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=cut |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
sub proteinCount { |
|
433
|
2
|
|
|
2
|
1
|
4689705
|
my $o = shift; |
|
434
|
2
|
100
|
|
|
|
22
|
return $o->{cache}->{proteinCount} if exists $o->{cache}->{proteinCount}; |
|
435
|
1
|
|
|
|
|
8
|
my @proteins = $o->getLeadingProteins(); |
|
436
|
1
|
|
|
|
|
6
|
$o->{cache}->{proteinCount} = scalar @proteins; |
|
437
|
1
|
|
|
|
|
14
|
return $o->{cache}->{proteinCount}; |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=head2 getProteinGroupIds |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=cut |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
sub getProteinGroupIds { |
|
445
|
2
|
|
|
2
|
1
|
8
|
my $o = shift; |
|
446
|
2
|
100
|
|
|
|
16
|
$o->{cache}->{proteinGroupIds} = [sort $o->extractColumnValues(column=>'Protein group IDs')] unless exists $o->{cache}->{proteinGroupIds}; |
|
447
|
2
|
|
|
|
|
7
|
return @{$o->{cache}->{proteinGroupIds}} |
|
|
2
|
|
|
|
|
32
|
|
|
448
|
|
|
|
|
|
|
} |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head2 getLeadingProteins |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=cut |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub getLeadingProteins { |
|
455
|
4
|
|
|
4
|
1
|
1343
|
my $o = shift; |
|
456
|
4
|
100
|
|
|
|
37
|
$o->{cache}->{leadingProteins} = [sort $o->extractColumnValues(column=>'Leading Proteins')] unless exists $o->{cache}->{proteinGroupIds}; |
|
457
|
4
|
|
|
|
|
13
|
return @{$o->{cache}->{leadingProteins}}; |
|
|
4
|
|
|
|
|
54
|
|
|
458
|
|
|
|
|
|
|
} |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=head2 logRatios |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
Logs ratios (base 2) throughout the dataset, and sets a flag so it can't get logged again. |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
Treatment of "special values": empty string, <= 0, NaN, and any other non-number are removed |
|
465
|
|
|
|
|
|
|
from the data! |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=cut |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
sub logRatios { |
|
470
|
17
|
|
|
17
|
1
|
1120
|
my $o = shift; |
|
471
|
17
|
100
|
|
|
|
120
|
return 0 if $o->{logged}; |
|
472
|
11
|
|
|
|
|
41
|
$o->{logged} = 1; |
|
473
|
11
|
|
|
|
|
31
|
my $data = $o->{data}; |
|
474
|
11
|
|
|
|
|
350
|
foreach my $exptname(keys %$data){ |
|
475
|
297
|
|
|
|
|
1436
|
my $experiment = $data->{$exptname}; |
|
476
|
297
|
|
|
|
|
17597
|
foreach my $proteinGroupId(keys %$experiment){ |
|
477
|
30724
|
|
|
|
|
73697
|
my $proteinGroup = $experiment->{$proteinGroupId}; |
|
478
|
30724
|
|
|
|
|
94740
|
my $ratios = $proteinGroup->{'Ratio H/L'}; |
|
479
|
30724
|
|
|
|
|
53972
|
my @newRatios = (); |
|
480
|
30724
|
|
|
|
|
82672
|
foreach (0..$#$ratios){ |
|
481
|
30724
|
100
|
|
|
|
265522
|
$ratios->[$_] = $ratios->[$_] =~ /^\d+\.?\d*$/ |
|
482
|
|
|
|
|
|
|
? log($ratios->[$_])/log(2) |
|
483
|
|
|
|
|
|
|
: ''; |
|
484
|
|
|
|
|
|
|
} |
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
} |
|
487
|
11
|
|
|
|
|
166
|
return 1; |
|
488
|
|
|
|
|
|
|
} |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=head2 filter |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
returns a set of protein records based on filter parameters... |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=head3 options |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
=over |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=item experiment - regular expression to match experiment name |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=item proteinGroupId - regular expression to match protein group id |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=item leadingProteins - regular expression to match leading protein ids |
|
503
|
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=item notLeadingProteins - regular expression to not match leading protein ids |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=back |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
Returns a filtered object of the same type, with relevant flags set (e.g. whether |
|
509
|
|
|
|
|
|
|
data has been logged, etc). |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
Warning, intentionally does not perform a deep clone! |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=cut |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
sub filter { |
|
516
|
8511
|
|
|
8511
|
1
|
47300
|
my ($o,%opts) = @_; |
|
517
|
|
|
|
|
|
|
# options : |
|
518
|
|
|
|
|
|
|
# use Data::Dumper; |
|
519
|
|
|
|
|
|
|
# print STDERR 'OPTS: ', Dumper \%opts; |
|
520
|
8511
|
|
|
|
|
24747
|
my $data = $o->{data}; |
|
521
|
8511
|
|
|
|
|
29499
|
my $result = {}; |
|
522
|
8511
|
|
|
|
|
122390
|
foreach my $experiment(keys %$data){ |
|
523
|
229797
|
100
|
66
|
|
|
1723510
|
if(! exists $opts{experiment} || $experiment =~ /$opts{experiment}/){ |
|
524
|
9063
|
|
|
|
|
34023
|
$result->{$experiment} = {}; |
|
525
|
9063
|
|
|
|
|
24743
|
my $exptdata = $data->{$experiment}; |
|
526
|
9063
|
|
|
|
|
578225
|
foreach my $pgid(keys %$exptdata){ |
|
527
|
944201
|
50
|
33
|
|
|
2852151
|
if(! exists $opts{proteinGroupId} || $pgid =~ /$opts{proteinGroupId}/){ |
|
528
|
944201
|
|
|
|
|
1904095
|
my $pgdata = $exptdata->{$pgid}; |
|
529
|
944201
|
100
|
100
|
|
|
5914064
|
if(! exists $opts{leadingProteins} || $pgdata->{'Leading Proteins'} =~ /$opts{leadingProteins}/){ |
|
530
|
466181
|
100
|
66
|
|
|
1402744
|
if(! exists $opts{notLeadingProteins} || $pgdata->{'Leading Proteins'} !~ /$opts{notLeadingProteins}/){ |
|
531
|
463434
|
|
|
|
|
1914190
|
$result->{$experiment}->{$pgid} = $pgdata; |
|
532
|
|
|
|
|
|
|
} |
|
533
|
|
|
|
|
|
|
} |
|
534
|
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
} |
|
536
|
|
|
|
|
|
|
} |
|
537
|
|
|
|
|
|
|
} |
|
538
|
|
|
|
|
|
|
# print STDERR Dumper $result if $opts{experiment} eq qr/^LCC1.nE.r2$/; |
|
539
|
8511
|
|
|
|
|
80978
|
my $p = $o->new; |
|
540
|
8511
|
|
|
|
|
143809
|
%$p = %$o; |
|
541
|
8511
|
|
|
|
|
36483
|
$p->{data} = $result; |
|
542
|
8511
|
|
|
|
|
20120
|
$o->{lastfiltered} = $p; |
|
543
|
8511
|
|
|
|
|
37450
|
return $p; |
|
544
|
|
|
|
|
|
|
} |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=head2 replicateMedian |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
options are passed to filter. |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=cut |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
sub replicateMedian { |
|
553
|
355
|
|
|
355
|
1
|
174821
|
my ($o,%opts) = @_; |
|
554
|
355
|
|
|
|
|
1563
|
my $f = $o->filter(%opts); |
|
555
|
355
|
|
|
|
|
1707
|
return $f->median( |
|
556
|
|
|
|
|
|
|
$f->extractColumnValues( |
|
557
|
|
|
|
|
|
|
column => 'Ratio H/L', |
|
558
|
|
|
|
|
|
|
nodup => 0, |
|
559
|
|
|
|
|
|
|
) |
|
560
|
|
|
|
|
|
|
); |
|
561
|
|
|
|
|
|
|
} |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
=head2 deviations |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
returns an hashref with the following keys |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=over |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=item n - the number of items |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=item sd - the standard deviation (from the mean) |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
=item mad - the median absolute deviation (from the median) |
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=item sd_via_mad - the standard deviation estimated from the median absolute deviation |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=back |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=cut |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
sub deviations { |
|
582
|
2826
|
|
|
2826
|
1
|
79414
|
my ($o,%opts) = @_; |
|
583
|
|
|
|
|
|
|
# cache |
|
584
|
2826
|
100
|
|
|
|
38227
|
$o->{cache}->{deviations} = {} unless exists $o->{cache}->{deviations}; |
|
585
|
2826
|
|
|
|
|
22879
|
my $cachekey = join('::', map {"$_=$opts{$_}"} sort keys %opts); |
|
|
16780
|
|
|
|
|
59712
|
|
|
586
|
|
|
|
|
|
|
# print STDERR "$cachekey\n"; |
|
587
|
2826
|
100
|
|
|
|
21707
|
return $o->{cache}->{deviations}->{$cachekey} if exists $o->{cache}->{deviations}->{$cachekey}; |
|
588
|
|
|
|
|
|
|
## |
|
589
|
2825
|
|
|
|
|
18148
|
my $f = $o->filter(%opts); |
|
590
|
2825
|
|
|
|
|
17091
|
my @values = $f->extractColumnValues( |
|
591
|
|
|
|
|
|
|
column => 'Ratio H/L', |
|
592
|
|
|
|
|
|
|
nodup => 0, |
|
593
|
|
|
|
|
|
|
); |
|
594
|
2825
|
|
|
|
|
24307
|
my $n = scalar @values; |
|
595
|
2825
|
50
|
|
|
|
30591
|
my $d = $n > 1 ? $o->sd(@values) : ''; |
|
596
|
2825
|
|
|
|
|
12770
|
$d->{'values'} = \@values; |
|
597
|
2825
|
50
|
|
|
|
22829
|
$d->{mad} = $n ? $o->mad(@values) : ''; |
|
598
|
|
|
|
|
|
|
|
|
599
|
2825
|
50
|
|
|
|
25093
|
$d->{sd_from_mad} = $d->{sd_via_mad} = $n ? $d->{mad} * 1.4826016694 : ''; |
|
600
|
2825
|
50
|
|
|
|
20046
|
$d->{usv_mad} = $n ? $d->{sd_from_mad} ** 2 : ''; |
|
601
|
2825
|
50
|
|
|
|
23226
|
$d->{median} = $n ? $o->median(@values) : ''; |
|
602
|
|
|
|
|
|
|
# I should think about caching here!!! DONE! |
|
603
|
2825
|
|
|
|
|
28047
|
$o->{cache}->{deviations}->{$cachekey} = $d; |
|
604
|
2825
|
|
|
|
|
36098
|
return $d; |
|
605
|
|
|
|
|
|
|
} |
|
606
|
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
=head2 mean |
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
given a list of values, returns the mean |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
=cut |
|
612
|
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
sub mean { |
|
614
|
2827
|
|
|
2827
|
1
|
68611
|
my ($o,@values) = @_; |
|
615
|
2827
|
50
|
|
|
|
19824
|
if(scalar(@values) < 1){ return ''; } |
|
|
0
|
|
|
|
|
0
|
|
|
616
|
2827
|
|
|
|
|
26156
|
return $o->sum(@values) / scalar @values; |
|
617
|
|
|
|
|
|
|
} |
|
618
|
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
=head2 sd (unbiased standard deviation) |
|
620
|
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
given a list of values, returns a hash with keys mean and sd (standard deviation). |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=cut |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub sd { |
|
626
|
2827
|
|
|
2827
|
1
|
37100
|
my ($o,@values) = @_; |
|
627
|
2827
|
|
|
|
|
6132
|
my $n = scalar(@values); |
|
628
|
2827
|
50
|
|
|
|
19764
|
my $mean = $n ? $o->mean(@values) : ''; |
|
629
|
2827
|
|
|
|
|
12553
|
my $sos = $o->sum(map {($_ - $mean)**2} @values); |
|
|
330832
|
|
|
|
|
586274
|
|
|
630
|
2827
|
50
|
|
|
|
21744
|
if($n > 1){ |
|
631
|
2827
|
|
|
|
|
10641
|
$sos /= ($n-1); |
|
632
|
|
|
|
|
|
|
} |
|
633
|
|
|
|
|
|
|
else { |
|
634
|
0
|
|
|
|
|
0
|
$sos = ''; |
|
635
|
|
|
|
|
|
|
} |
|
636
|
|
|
|
|
|
|
return { |
|
637
|
2827
|
|
|
|
|
37144
|
sd => sqrt($sos), |
|
638
|
|
|
|
|
|
|
usv => $sos, |
|
639
|
|
|
|
|
|
|
mean => $mean, |
|
640
|
|
|
|
|
|
|
n => $n, |
|
641
|
|
|
|
|
|
|
}; |
|
642
|
|
|
|
|
|
|
} |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
=head2 sum |
|
645
|
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
given a list of values, returns the sum |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=cut |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
sub sum { |
|
651
|
5654
|
|
|
5654
|
1
|
114118
|
my ($o,@values) = @_; |
|
652
|
5654
|
|
|
|
|
12944
|
my $t = 0; |
|
653
|
5654
|
|
|
|
|
326071
|
$t += $_ foreach @values; |
|
654
|
5654
|
|
|
|
|
58407
|
return $t; |
|
655
|
|
|
|
|
|
|
} |
|
656
|
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
=head2 mad |
|
658
|
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
given a list of values, returns the median absolute deviation |
|
660
|
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
=cut |
|
662
|
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
sub mad { |
|
664
|
2825
|
|
|
2825
|
1
|
43932
|
my ($o,@values) = @_; |
|
665
|
2825
|
50
|
|
|
|
13610
|
if(scalar(@values) < 1){ |
|
666
|
0
|
|
|
|
|
0
|
return ''; |
|
667
|
|
|
|
|
|
|
} |
|
668
|
2825
|
|
|
|
|
17105
|
my $median = $o->median(@values); |
|
669
|
2825
|
|
|
|
|
9318
|
my @ads = map {abs ($_ - $median)} @values; |
|
|
330822
|
|
|
|
|
613525
|
|
|
670
|
2825
|
|
|
|
|
37064
|
return $o->median(@ads); |
|
671
|
|
|
|
|
|
|
} |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=head2 ttest |
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
Given options, experiment1, experiment2 and optional filters, |
|
676
|
|
|
|
|
|
|
returns a hash of statistics... |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
stats1 and stats2 are hashes of deviations: sd, mad, sd_via_mad, usv, n, values |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
ttest is hash of Welch's ttest results: t, df, p |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
ttest_mad is like ttest but based on median and median absolute deviateions. |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
The p-values are derived using Welch's Ttest and the t-distribution function from |
|
685
|
|
|
|
|
|
|
Statistics::Distributions. |
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
MAD and medians are much more robust to outliers, which are significant in peptide ratios. |
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=cut |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
sub ttest { |
|
693
|
5202
|
|
|
5202
|
1
|
52181
|
my ($o,%opts) = @_; |
|
694
|
|
|
|
|
|
|
# cache |
|
695
|
5202
|
100
|
|
|
|
19464
|
if($opts{experiment1} gt $opts{experiment2}){ # sort requested expts |
|
696
|
1723
|
|
|
|
|
5586
|
($opts{experiment1}, $opts{experiment2}) = ($opts{experiment2}, $opts{experiment1}); |
|
697
|
|
|
|
|
|
|
} |
|
698
|
5202
|
100
|
|
|
|
24355
|
$o->{cache}->{ttests} = {} unless exists $o->{cache}->{ttests}; |
|
699
|
5202
|
|
|
|
|
34198
|
my $cachekey = join('::', map {"$_=$opts{$_}"} sort keys %opts); |
|
|
25966
|
|
|
|
|
90980
|
|
|
700
|
5202
|
100
|
|
|
|
57857
|
return $o->{cache}->{ttests}->{$cachekey} if exists $o->{cache}->{ttests}->{$cachekey}; |
|
701
|
|
|
|
|
|
|
## |
|
702
|
1402
|
|
|
|
|
5744
|
$opts{experiment} = $opts{experiment1}; |
|
703
|
1402
|
|
|
|
|
8534
|
my $d1 = $o->deviations(%opts); |
|
704
|
1402
|
|
|
|
|
12589
|
$opts{experiment} = $opts{experiment2}; |
|
705
|
1402
|
|
|
|
|
11812
|
my $d2 = $o->deviations(%opts); |
|
706
|
1402
|
|
|
|
|
15876
|
my $tt = $o->welchs_ttest( |
|
707
|
|
|
|
|
|
|
mean1 => $d1->{mean}, |
|
708
|
|
|
|
|
|
|
mean2 => $d2->{mean}, |
|
709
|
|
|
|
|
|
|
usv1 => $d1->{usv}, |
|
710
|
|
|
|
|
|
|
usv2 => $d2->{usv}, |
|
711
|
|
|
|
|
|
|
n1 => $d1->{n}, |
|
712
|
|
|
|
|
|
|
n2 => $d2->{n}, |
|
713
|
|
|
|
|
|
|
); |
|
714
|
1402
|
50
|
33
|
|
|
27797
|
$tt->{p} = ($d1->{n} && $d2->{n}) ? Statistics::Distributions::tprob(int ($tt->{df}), $tt->{t}) : ''; |
|
715
|
1402
|
|
|
|
|
371766
|
my $tm = $o->welchs_ttest( |
|
716
|
|
|
|
|
|
|
mean1 => $d1->{median}, |
|
717
|
|
|
|
|
|
|
mean2 => $d2->{median}, |
|
718
|
|
|
|
|
|
|
usv1 => $d1->{usv_mad}, |
|
719
|
|
|
|
|
|
|
usv2 => $d2->{usv_mad}, |
|
720
|
|
|
|
|
|
|
n1 => $d1->{n}, |
|
721
|
|
|
|
|
|
|
n2 => $d2->{n}, |
|
722
|
|
|
|
|
|
|
); |
|
723
|
1402
|
50
|
33
|
|
|
18817
|
$tm->{p} = ($d1->{n} && $d2->{n}) ? Statistics::Distributions::tprob(int ($tm->{df}), $tm->{t}) : ''; |
|
724
|
|
|
|
|
|
|
|
|
725
|
1402
|
|
|
|
|
307460
|
my $r = { |
|
726
|
|
|
|
|
|
|
stats1 => $d1, stats2 => $d2, ttest => $tt, ttest_mad => $tm |
|
727
|
|
|
|
|
|
|
}; |
|
728
|
1402
|
|
|
|
|
12334
|
$o->{cache}->{ttests}->{$cachekey} = $r; |
|
729
|
1402
|
|
|
|
|
13433
|
return $r; |
|
730
|
|
|
|
|
|
|
} |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
=head2 welchs_ttest |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
performs Welch's ttest, given mean1, mean2, usv1, usv2, n1 and n2 in a hash. |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
e.g. |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
$o->welchs_ttest( mean1 => 4, mean2 => 3, # sample mean |
|
739
|
|
|
|
|
|
|
usv1 => 1, usv2 => 1.1, # unbiased sample variance (returned as usv from $o->sd |
|
740
|
|
|
|
|
|
|
n1 => 4, n2=> 7 # number of observations |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
also performs Welch-Satterthwaite to calculate degrees of freedom (to look up in t-statistic table) |
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
Returns hashref containing t and df. |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
=cut |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
sub welchs_ttest { |
|
749
|
2805
|
|
|
2805
|
1
|
27807
|
my ($o, %t) = @_; |
|
750
|
2805
|
|
|
|
|
9249
|
my ($x1,$x2,$v1,$v2,$n1,$n2) = map {$t{$_}} qw/mean1 mean2 usv1 usv2 n1 n2/; |
|
|
16830
|
|
|
|
|
37434
|
|
|
751
|
2805
|
|
|
|
|
9893
|
my ($vn1,$vn2) = ($v1/$n1, $v2/$n2); |
|
752
|
2805
|
|
|
|
|
8904
|
my $t = abs($x1 - $x2) / sqrt( $vn1 + $vn2 ); |
|
753
|
2805
|
|
|
|
|
12687
|
my $df = ($vn1 + $vn2)**2 / ( $vn1**2/($n1-1) + $vn2**2/($n2-1) ); |
|
754
|
2805
|
|
|
|
|
21831
|
return {t => $t, df => $df}; |
|
755
|
|
|
|
|
|
|
} |
|
756
|
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
=head2 replicateMedianSubtractions |
|
758
|
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
Logs data, if not already done, calculates median for each replicate, and subtracts median from each evidence in that replicate. |
|
760
|
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
=cut |
|
762
|
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
sub replicateMedianSubtractions { |
|
764
|
5
|
|
|
5
|
1
|
369
|
my ($o, %opts) = @_; # can set filter here |
|
765
|
5
|
|
|
|
|
28
|
$o->logRatios(); |
|
766
|
5
|
|
|
|
|
34
|
foreach my $replicate($o->experiments()){ |
|
767
|
135
|
|
|
|
|
941
|
my $median = $o->replicateMedian(%opts, experiment=>$replicate); |
|
768
|
135
|
|
|
|
|
868
|
my $p = $o->filter(experiment=>$replicate); |
|
769
|
135
|
|
|
|
|
280
|
foreach my $pgid(keys %{$p->{data}->{$replicate}}){ |
|
|
135
|
|
|
|
|
2847
|
|
|
770
|
13965
|
|
|
|
|
18686
|
foreach my $i(0.. $#{$p->{data}->{$replicate}->{$pgid}->{'Ratio H/L'}}){ |
|
|
13965
|
|
|
|
|
69328
|
|
|
771
|
13965
|
100
|
|
|
|
100753
|
if($p->{data}->{$replicate}->{$pgid}->{'Ratio H/L'}->[$i] =~ /\d/){ |
|
772
|
13280
|
|
|
|
|
78911
|
$p->{data}->{$replicate}->{$pgid}->{'Ratio H/L'}->[$i] -= $median; |
|
773
|
|
|
|
|
|
|
} |
|
774
|
|
|
|
|
|
|
} |
|
775
|
|
|
|
|
|
|
} |
|
776
|
|
|
|
|
|
|
} |
|
777
|
|
|
|
|
|
|
# i guess we should do something better with generating this status: |
|
778
|
5
|
|
|
|
|
80
|
return 1; |
|
779
|
|
|
|
|
|
|
} |
|
780
|
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
=head2 median |
|
782
|
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
given a list of numbers, returns the median... assumes all items are numbers! |
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
=cut |
|
786
|
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
sub median { |
|
788
|
8834
|
|
|
8834
|
1
|
18817
|
my $o = shift; |
|
789
|
8834
|
|
|
|
|
55317
|
my @list = sort {$a <=> $b} @_; |
|
|
5892786
|
|
|
|
|
7771332
|
|
|
790
|
8834
|
|
|
|
|
17954
|
my $n = scalar @list; |
|
791
|
8834
|
100
|
|
|
|
49007
|
if($n % 2){ # remainder on division by two -> it's odd! |
|
792
|
3940
|
|
|
|
|
50834
|
return $list[($n-1)/2]; # index of last over 2, e.g. 21 items, last index 20, return 10. |
|
793
|
|
|
|
|
|
|
} |
|
794
|
|
|
|
|
|
|
else { # it's not odd... so it's even |
|
795
|
4894
|
|
|
|
|
66267
|
return ($list[$n/2 - 1] + $list[$n/2]) / 2; # length over 2 and the same minus 1, e.g. 20 items, we want 9 and 10. |
|
796
|
|
|
|
|
|
|
} |
|
797
|
|
|
|
|
|
|
} |
|
798
|
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
=head2 experimentMaximumPvalue |
|
801
|
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=cut |
|
803
|
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
sub experimentMaximumPvalue { |
|
805
|
866
|
|
|
866
|
1
|
13973
|
my ($o,%opts) = @_; |
|
806
|
|
|
|
|
|
|
# run through experiments and collect replicate names for comparisons... |
|
807
|
|
|
|
|
|
|
# this should be filtered for individual proteins using the leadingProteins option. |
|
808
|
866
|
|
|
|
|
3287
|
my @reps1 = (); |
|
809
|
866
|
|
|
|
|
2720
|
my @reps2 = (); |
|
810
|
866
|
|
|
|
|
4963
|
foreach my $rep($o->experiments){ |
|
811
|
23382
|
100
|
|
|
|
105896
|
if($rep =~ /^$opts{experiment1}/){ |
|
812
|
2598
|
|
|
|
|
5303
|
push @reps1, $rep; |
|
813
|
|
|
|
|
|
|
} |
|
814
|
23382
|
100
|
|
|
|
127597
|
if($rep =~ /^$opts{experiment2}/){ |
|
815
|
2598
|
|
|
|
|
8756
|
push @reps2, $rep; |
|
816
|
|
|
|
|
|
|
} |
|
817
|
|
|
|
|
|
|
} |
|
818
|
|
|
|
|
|
|
# now there must be enough replicates with enough observations in each... |
|
819
|
866
|
50
|
|
|
|
7896
|
$opts{minimum_observations} = 2 unless exists $opts{minimum_observations}; |
|
820
|
866
|
50
|
|
|
|
4606
|
$opts{minimum_replicates} = 2 unless exists $opts{minimum_replicates}; |
|
821
|
866
|
|
|
|
|
12660
|
my $reps1 = 0; |
|
822
|
866
|
|
|
|
|
1966
|
my $reps2 = 0; |
|
823
|
866
|
|
|
|
|
2609
|
foreach my $rep(@reps1){ |
|
824
|
2598
|
|
|
|
|
11161
|
my $f = $o->filter(experiment=>$rep, leadingProteins=>$opts{filter}); |
|
825
|
2598
|
|
|
|
|
11830
|
my @values = $f->extractColumnValues(column => 'Ratio H/L'); |
|
826
|
2598
|
100
|
|
|
|
23886
|
$reps1 ++ if scalar(@values) > $opts{minimum_observations}; |
|
827
|
|
|
|
|
|
|
} |
|
828
|
866
|
|
|
|
|
3446
|
foreach my $rep(@reps2){ |
|
829
|
2598
|
|
|
|
|
11475
|
my $f = $o->filter(experiment=>$rep, leadingProteins=>$opts{filter}); |
|
830
|
2598
|
|
|
|
|
11934
|
my @values = $f->extractColumnValues(column => 'Ratio H/L', nodup => 0); |
|
831
|
2598
|
100
|
|
|
|
18547
|
$reps2 ++ if scalar(@values) > $opts{minimum_observations}; |
|
832
|
|
|
|
|
|
|
} |
|
833
|
866
|
100
|
100
|
|
|
12666
|
return {p_max => -1, p_mad_max => -1} if $reps1 < $opts{minimum_replicates} || $reps2 < $opts{minimum_replicates}; |
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
# compare each combination of replicates |
|
836
|
518
|
|
|
|
|
1623
|
my $p_max = 0; |
|
837
|
518
|
|
|
|
|
1734
|
my $p_mad_max = 0; |
|
838
|
518
|
|
|
|
|
1947
|
foreach my $r1(@reps1){ |
|
839
|
1554
|
|
|
|
|
3874
|
foreach my $r2(@reps2){ |
|
840
|
4662
|
|
|
|
|
27775
|
my $tt = $o->ttest(%opts, experiment1=>$r1, experiment2=>$r2); |
|
841
|
4662
|
100
|
|
|
|
48833
|
$p_max = $tt->{ttest}->{p} if $tt->{ttest}->{p} > $p_max; |
|
842
|
4662
|
100
|
|
|
|
30063
|
$p_mad_max = $tt->{ttest_mad}->{p} if $tt->{ttest_mad}->{p} > $p_mad_max; |
|
843
|
|
|
|
|
|
|
} |
|
844
|
|
|
|
|
|
|
} |
|
845
|
|
|
|
|
|
|
# compare experiments overall |
|
846
|
518
|
|
|
|
|
3385
|
my $tt = $o->ttest(%opts); |
|
847
|
518
|
100
|
|
|
|
5314
|
$p_max = $tt->{ttest}->{p} if $tt->{ttest}->{p} > $p_max; |
|
848
|
518
|
50
|
|
|
|
3888
|
$p_mad_max = $tt->{ttest_mad}->{p} if $tt->{ttest_mad}->{p} > $p_mad_max; |
|
849
|
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
# report the maxima |
|
851
|
518
|
|
|
|
|
7798
|
return {p_max=>$p_max, p_mad_max=>$p_mad_max}; |
|
852
|
|
|
|
|
|
|
} |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
=head2 fullProteinComparison |
|
855
|
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
Does a full comparison on a particular protein, i.e. compares all pairs of conditions, also does |
|
857
|
|
|
|
|
|
|
differential response analysis. Allows limitation of analysis to proteotypic peptides. |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
=cut |
|
860
|
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
sub fullProteinComparison { |
|
862
|
8
|
|
|
8
|
1
|
126
|
my ($o, %opts) = @_; |
|
863
|
|
|
|
|
|
|
# %opts should have our protein listed as "filter" |
|
864
|
8
|
|
|
|
|
47
|
my @pairs = $o->pairs(); |
|
865
|
8
|
|
|
|
|
59
|
my @orths = $o->orthogonals(); |
|
866
|
8
|
|
|
|
|
43
|
my %results = (); |
|
867
|
8
|
|
|
|
|
24
|
foreach my $p(@pairs){ |
|
868
|
288
|
|
|
|
|
2410
|
my ($e1,$e2) = split(/\s+/, $p); |
|
869
|
288
|
|
|
|
|
2154
|
$results{$p} = $o->experimentMaximumPvalue(%opts, experiment1=>$e1, experiment2=>$e2); |
|
870
|
|
|
|
|
|
|
} |
|
871
|
8
|
|
|
|
|
40
|
foreach my $p(@orths){ |
|
872
|
288
|
|
|
|
|
2239
|
my ($e1,$e2,$e3) = split(/\s+/, $p); |
|
873
|
288
|
|
|
|
|
2014
|
my $r1 = $o->experimentMaximumPvalue(%opts, experiment1=>$e1, experiment2=>$e2); |
|
874
|
288
|
|
|
|
|
2414
|
my $r2 = $o->experimentMaximumPvalue(%opts, experiment1=>$e1, experiment2=>$e3); |
|
875
|
288
|
100
|
100
|
|
|
4190
|
if($r1->{p_max} < 0 || $r2->{p_max} < 0){ |
|
|
|
100
|
|
|
|
|
|
|
876
|
120
|
|
|
|
|
2579
|
$r1->{p_max} = -1; |
|
877
|
|
|
|
|
|
|
} |
|
878
|
|
|
|
|
|
|
elsif($r2->{p_max} > $r1->{p_max}) { |
|
879
|
54
|
|
|
|
|
174
|
$r1->{p_max} = $r2->{p_max}; |
|
880
|
|
|
|
|
|
|
} |
|
881
|
288
|
100
|
100
|
|
|
2818
|
if($r1->{p_mad_max} < 0 || $r2->{p_mad_max} < 0){ |
|
|
|
100
|
|
|
|
|
|
|
882
|
120
|
|
|
|
|
548
|
$r1->{p_mad_max} = -1; |
|
883
|
|
|
|
|
|
|
} |
|
884
|
|
|
|
|
|
|
elsif($r2->{p_mad_max} > $r1->{p_mad_max}) { |
|
885
|
52
|
|
|
|
|
272
|
$r1->{p_mad_max} = $r2->{p_mad_max}; |
|
886
|
|
|
|
|
|
|
} |
|
887
|
288
|
|
|
|
|
2604
|
$results{$p} = $r1; |
|
888
|
|
|
|
|
|
|
} |
|
889
|
8
|
|
|
|
|
274
|
return \%results; |
|
890
|
|
|
|
|
|
|
} |
|
891
|
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
=head2 fullComparison |
|
893
|
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
Does a full comparison for each protein. Returns hash of hashes. |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
=cut |
|
897
|
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
sub fullComparison { |
|
899
|
1
|
|
|
1
|
1
|
2005
|
my $o = shift; |
|
900
|
1
|
|
|
|
|
7
|
my @leadingProteins = $o->getLeadingProteins(); |
|
901
|
1
|
|
|
|
|
5
|
my %results = (); |
|
902
|
1
|
|
|
|
|
4
|
foreach my $lp(@leadingProteins){ |
|
903
|
7
|
|
|
|
|
39
|
$results{$lp} = $o->fullProteinComparison(filter=>$lp); |
|
904
|
|
|
|
|
|
|
} |
|
905
|
1
|
|
|
|
|
15
|
return \%results; |
|
906
|
|
|
|
|
|
|
} |
|
907
|
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
=head2 direction |
|
909
|
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
given two values, returns whether the different between first and second is positive or negative |
|
911
|
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
returns '+' or '-' |
|
913
|
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
=cut |
|
915
|
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
sub direction { |
|
917
|
0
|
0
|
|
0
|
1
|
|
return $_[1] > $_[2] ? '-' : '+'; |
|
918
|
|
|
|
|
|
|
} |
|
919
|
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=head2 directionsDisagree |
|
921
|
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
given two directions, which could be '+', '-' or '', returns true if one is '+' and the other is '-' |
|
923
|
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
=cut |
|
925
|
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
sub directionsDisagree { |
|
927
|
0
|
0
|
0
|
0
|
1
|
|
return if $_[1] eq '-' && $_[2] eq '+'; |
|
928
|
0
|
0
|
0
|
|
|
|
return if $_[1] eq '+' && $_[2] eq '-'; |
|
929
|
0
|
|
|
|
|
|
return 1; # must be the same or one is blank. |
|
930
|
|
|
|
|
|
|
} |
|
931
|
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
=head1 AUTHOR |
|
936
|
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
jimi, C<< >> |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
=head1 BUGS |
|
940
|
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
|
942
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
|
943
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
|
944
|
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
=head1 SUPPORT |
|
949
|
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
|
951
|
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
perldoc Bio::MaxQuant::Evidence::Statistics |
|
953
|
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
You can also look for information at: |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
=over 4 |
|
958
|
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
|
960
|
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
L |
|
962
|
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
|
964
|
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
L |
|
966
|
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
=item * CPAN Ratings |
|
968
|
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
L |
|
970
|
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
=item * Search CPAN |
|
972
|
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
L |
|
974
|
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
=back |
|
976
|
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
979
|
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
|
982
|
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
Copyright 2014 jimi. |
|
984
|
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
This program is released under the following license: artistic2 |
|
986
|
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
=cut |
|
989
|
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
1; # End of Bio::MaxQuant::Evidence::Statistics |