| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# BioPerl module for Bio::PopGen::Population |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Cared for by Jason Stajich |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# Copyright Jason Stajich |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::PopGen::Population - A population of individuals |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Bio::PopGen::Population; |
|
21
|
|
|
|
|
|
|
use Bio::PopGen::Individual; |
|
22
|
|
|
|
|
|
|
my $population = Bio::PopGen::Population->new(); |
|
23
|
|
|
|
|
|
|
my $ind = Bio::PopGen::Individual->new(-unique_id => 'id'); |
|
24
|
|
|
|
|
|
|
$population->add_Individual($ind); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
for my $ind ( $population->get_Individuals ) { |
|
27
|
|
|
|
|
|
|
# iterate through the individuals |
|
28
|
|
|
|
|
|
|
} |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
for my $name ( $population->get_marker_names ) { |
|
31
|
|
|
|
|
|
|
my $marker = $population->get_Marker($name); |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $num_inds = $population->get_number_individuals; |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $homozygote_f = $population->get_Frequency_Homozygotes; |
|
37
|
|
|
|
|
|
|
my $heterozygote_f = $population->get_Frequency_Heterozygotes; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# make a population haploid by making fake chromosomes through |
|
40
|
|
|
|
|
|
|
# haplotypes -- ala allele 1 is on chrom 1 and allele 2 is on chrom 2 |
|
41
|
|
|
|
|
|
|
# the number of individuals created will thus be 2 x number in |
|
42
|
|
|
|
|
|
|
# population |
|
43
|
|
|
|
|
|
|
my $happop = $population->haploid_population; |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
This is a collection of individuals. We'll have ways of generating |
|
49
|
|
|
|
|
|
|
L objects out so we can calculate allele_frequencies |
|
50
|
|
|
|
|
|
|
for implementing the various statistical tests. |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
57
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
|
58
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
61
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head2 Support |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
I |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
70
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
71
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
72
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
77
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
|
78
|
|
|
|
|
|
|
email or the web: |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 AUTHOR - Jason Stajich |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Email jason-at-bioperl.org |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Matthew Hahn, matthew.hahn-at-duke.edu |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head1 APPENDIX |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
|
93
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# Let the code begin... |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
package Bio::PopGen::Population; |
|
102
|
3
|
|
|
3
|
|
713
|
use strict; |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
79
|
|
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# Object preamble - inherits from Bio::Root::Root |
|
105
|
|
|
|
|
|
|
|
|
106
|
3
|
|
|
3
|
|
705
|
use Bio::PopGen::Marker; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
70
|
|
|
107
|
3
|
|
|
3
|
|
437
|
use Bio::PopGen::Genotype; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
125
|
|
|
108
|
|
|
|
|
|
|
our $CheckISA = 1; |
|
109
|
3
|
|
|
3
|
|
18
|
use base qw(Bio::Root::Root Bio::PopGen::PopulationI); |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
879
|
|
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head2 new |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Title : new |
|
114
|
|
|
|
|
|
|
Usage : my $obj = Bio::PopGen::Population->new(); |
|
115
|
|
|
|
|
|
|
Function: Builds a new Bio::PopGen::Population object |
|
116
|
|
|
|
|
|
|
Returns : an instance of Bio::PopGen::Population |
|
117
|
|
|
|
|
|
|
Args : -individuals => array ref of individuals (optional) |
|
118
|
|
|
|
|
|
|
-name => population name (optional) |
|
119
|
|
|
|
|
|
|
-source => a source tag (optional) |
|
120
|
|
|
|
|
|
|
-description => a short description string of the population (optional) |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=cut |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub new { |
|
125
|
52
|
|
|
52
|
1
|
4498
|
my($class,@args) = @_; |
|
126
|
|
|
|
|
|
|
|
|
127
|
52
|
|
|
|
|
148
|
my $self = $class->SUPER::new(@args); |
|
128
|
52
|
|
|
|
|
91
|
$self->{'_individuals'} = []; |
|
129
|
52
|
|
|
|
|
184
|
my ($name,$source,$description, |
|
130
|
|
|
|
|
|
|
$inds,$checkisa) = $self->_rearrange([qw(NAME |
|
131
|
|
|
|
|
|
|
SOURCE |
|
132
|
|
|
|
|
|
|
DESCRIPTION |
|
133
|
|
|
|
|
|
|
INDIVIDUALS |
|
134
|
|
|
|
|
|
|
CHECKISA)], @args); |
|
135
|
52
|
100
|
|
|
|
137
|
if( defined $inds ) { |
|
136
|
50
|
50
|
|
|
|
177
|
if( ref($inds) !~ /ARRAY/i ) { |
|
137
|
0
|
|
|
|
|
0
|
$self->warn("Need to provide a value array ref for the -individuals initialization flag"); |
|
138
|
|
|
|
|
|
|
} else { |
|
139
|
50
|
|
|
|
|
251
|
$self->add_Individual(@$inds); |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
|
|
143
|
52
|
100
|
|
|
|
108
|
defined $name && $self->name($name); |
|
144
|
52
|
100
|
|
|
|
117
|
defined $source && $self->source($source); |
|
145
|
52
|
100
|
|
|
|
118
|
defined $description && $self->description($description); |
|
146
|
52
|
100
|
|
|
|
103
|
$self->{'_checkisa'} = defined $checkisa ? $checkisa : $CheckISA; |
|
147
|
52
|
|
|
|
|
279
|
return $self; |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head2 name |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Title : name |
|
154
|
|
|
|
|
|
|
Usage : my $name = $pop->name |
|
155
|
|
|
|
|
|
|
Function: Get the population name |
|
156
|
|
|
|
|
|
|
Returns : string representing population name |
|
157
|
|
|
|
|
|
|
Args : [optional] string representing population name |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=cut |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub name{ |
|
163
|
43
|
|
|
43
|
1
|
56
|
my $self = shift; |
|
164
|
43
|
100
|
|
|
|
85
|
return $self->{'_name'} = shift if @_; |
|
165
|
34
|
|
|
|
|
95
|
return $self->{'_name'}; |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head2 description |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
Title : description |
|
171
|
|
|
|
|
|
|
Usage : my $description = $pop->description |
|
172
|
|
|
|
|
|
|
Function: Get the population description |
|
173
|
|
|
|
|
|
|
Returns : string representing population description |
|
174
|
|
|
|
|
|
|
Args : [optional] string representing population description |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub description{ |
|
180
|
41
|
|
|
41
|
1
|
49
|
my $self = shift; |
|
181
|
41
|
100
|
|
|
|
73
|
return $self->{'_description'} = shift if @_; |
|
182
|
34
|
|
|
|
|
125
|
return $self->{'_description'}; |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head2 source |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Title : source |
|
188
|
|
|
|
|
|
|
Usage : my $source = $pop->source |
|
189
|
|
|
|
|
|
|
Function: Get the population source |
|
190
|
|
|
|
|
|
|
Returns : string representing population source |
|
191
|
|
|
|
|
|
|
Args : [optional] string representing population source |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=cut |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub source{ |
|
197
|
38
|
|
|
38
|
1
|
50
|
my $self = shift; |
|
198
|
38
|
100
|
|
|
|
126
|
return $self->{'_source'} = shift if @_; |
|
199
|
34
|
|
|
|
|
74
|
return $self->{'_source'}; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head2 annotation |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Title : annotation |
|
205
|
|
|
|
|
|
|
Usage : my $annotation_collection = $pop->annotation; |
|
206
|
|
|
|
|
|
|
Function: Get/set a Bio::AnnotationCollectionI for this population |
|
207
|
|
|
|
|
|
|
Returns : Bio::AnnotationCollectionI object |
|
208
|
|
|
|
|
|
|
Args : [optional set] Bio::AnnotationCollectionI object |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=cut |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub annotation{ |
|
213
|
0
|
|
|
0
|
1
|
0
|
my ($self, $arg) = @_; |
|
214
|
0
|
0
|
|
|
|
0
|
return $self->{_annotation} unless $arg; |
|
215
|
0
|
0
|
0
|
|
|
0
|
$self->throw("Bio::AnnotationCollectionI required for argument") unless |
|
216
|
|
|
|
|
|
|
ref($arg) && $arg->isa('Bio::AnnotationCollectionI'); |
|
217
|
0
|
|
|
|
|
0
|
return $self->{_annotation} = $arg; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=head2 set_Allele_Frequency |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Title : set_Allele_Frequency |
|
223
|
|
|
|
|
|
|
Usage : $population->set_Allele_Frequency('marker' => { 'allele1' => 0.1}); |
|
224
|
|
|
|
|
|
|
Function: Sets an allele frequency for a Marker for this Population |
|
225
|
|
|
|
|
|
|
This allows the Population to not have individual individual |
|
226
|
|
|
|
|
|
|
genotypes but rather a set of overall allele frequencies |
|
227
|
|
|
|
|
|
|
Returns : Count of the number of markers |
|
228
|
|
|
|
|
|
|
Args : -name => (string) marker name |
|
229
|
|
|
|
|
|
|
-allele => (string) allele name |
|
230
|
|
|
|
|
|
|
-frequency => (double) allele frequency - must be between 0 and 1 |
|
231
|
|
|
|
|
|
|
OR |
|
232
|
|
|
|
|
|
|
-frequencies => { 'marker1' => { 'allele1' => 0.01, |
|
233
|
|
|
|
|
|
|
'allele2' => 0.99}, |
|
234
|
|
|
|
|
|
|
'marker2' => ... |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=cut |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
sub set_Allele_Frequency { |
|
240
|
3
|
|
|
3
|
1
|
432
|
my ($self,@args) = @_; |
|
241
|
3
|
|
|
|
|
10
|
my ($name,$allele, $frequency, |
|
242
|
|
|
|
|
|
|
$frequencies) = $self->_rearrange([qw(NAME |
|
243
|
|
|
|
|
|
|
ALLELE |
|
244
|
|
|
|
|
|
|
FREQUENCY |
|
245
|
|
|
|
|
|
|
FREQUENCIES |
|
246
|
|
|
|
|
|
|
)], @args); |
|
247
|
3
|
100
|
|
|
|
9
|
if( defined $frequencies ) { # this supersedes the res |
|
248
|
1
|
50
|
|
|
|
5
|
if( ref($frequencies) =~ /HASH/i ) { |
|
249
|
1
|
|
|
|
|
4
|
my ($markername,$alleles); |
|
250
|
1
|
|
|
|
|
5
|
while( ($markername,$alleles) = each %$frequencies ) { |
|
251
|
2
|
|
|
|
|
9
|
$self->{'_allele_freqs'}->{$markername} = |
|
252
|
|
|
|
|
|
|
Bio::PopGen::Marker->new(-name => $markername, |
|
253
|
|
|
|
|
|
|
-allele_freq => $alleles); |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
} else { |
|
256
|
0
|
|
|
|
|
0
|
$self->throw("Must provide a valid hashref for the -frequencies option"); |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
} else { |
|
259
|
2
|
100
|
|
|
|
4
|
unless( defined $self->{'_allele_freqs'}->{$name} ) { |
|
260
|
1
|
|
|
|
|
4
|
$self->{'_allele_freqs'}->{$name} = |
|
261
|
|
|
|
|
|
|
Bio::PopGen::Marker->new(-name => $name); |
|
262
|
|
|
|
|
|
|
} |
|
263
|
2
|
|
|
|
|
6
|
$self->{'_allele_freqs'}->{$name}->add_Allele_Frequency($allele,$frequency); |
|
264
|
|
|
|
|
|
|
} |
|
265
|
3
|
|
|
|
|
4
|
return scalar keys %{$self->{'_allele_freqs'}}; |
|
|
3
|
|
|
|
|
7
|
|
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=head2 add_Individual |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
Title : add_Individual |
|
272
|
|
|
|
|
|
|
Usage : $population->add_Individual(@individuals); |
|
273
|
|
|
|
|
|
|
Function: Add individuals to a population |
|
274
|
|
|
|
|
|
|
Returns : count of the current number in the object |
|
275
|
|
|
|
|
|
|
Args : Array of Individuals |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=cut |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub add_Individual{ |
|
281
|
51
|
|
|
51
|
1
|
218
|
my ($self,@inds) = @_; |
|
282
|
51
|
|
|
|
|
80
|
foreach my $i ( @inds ) { |
|
283
|
1404
|
50
|
|
|
|
1641
|
next if ! defined $i; |
|
284
|
|
|
|
|
|
|
|
|
285
|
1404
|
100
|
|
|
|
2155
|
unless( $self->{'_checkisa'} ? $i->isa('Bio::PopGen::IndividualI') : 1 ) { |
|
|
|
50
|
|
|
|
|
|
|
286
|
0
|
|
|
|
|
0
|
$self->warn("cannot add an individual ($i) which is not a Bio::PopGen::IndividualI"); |
|
287
|
0
|
|
|
|
|
0
|
next; |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
} |
|
290
|
51
|
|
|
|
|
64
|
push @{$self->{'_individuals'}}, @inds; |
|
|
51
|
|
|
|
|
270
|
|
|
291
|
51
|
|
|
|
|
102
|
$self->{'_cached_markernames'} = undef; |
|
292
|
51
|
|
|
|
|
75
|
$self->{'_allele_freqs'} = {}; |
|
293
|
51
|
50
|
|
|
|
64
|
return scalar @{$self->{'_individuals'} || []}; |
|
|
51
|
|
|
|
|
157
|
|
|
294
|
|
|
|
|
|
|
} |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=head2 remove_Individuals |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
Title : remove_Individuals |
|
300
|
|
|
|
|
|
|
Usage : $population->remove_Individuals(@ids); |
|
301
|
|
|
|
|
|
|
Function: Remove individual(s) to a population |
|
302
|
|
|
|
|
|
|
Returns : count of the current number in the object |
|
303
|
|
|
|
|
|
|
Args : Array of ids |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=cut |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub remove_Individuals { |
|
308
|
1
|
|
|
1
|
1
|
846
|
my ($self,@names) = @_; |
|
309
|
1
|
|
|
|
|
3
|
my $i = 0; |
|
310
|
1
|
|
|
|
|
1
|
my %namehash; # O(1) lookup will be faster I think |
|
311
|
1
|
|
|
|
|
2
|
foreach my $n ( @names ) { $namehash{$n}++ } |
|
|
1
|
|
|
|
|
3
|
|
|
312
|
1
|
|
|
|
|
2
|
my @tosplice; |
|
313
|
1
|
50
|
|
|
|
1
|
foreach my $ind ( @{$self->{'_individuals'} || []} ) { |
|
|
1
|
|
|
|
|
4
|
|
|
314
|
2
|
100
|
|
|
|
6
|
unshift @tosplice, $i if( $namehash{$ind->unique_id} ); |
|
315
|
2
|
|
|
|
|
3
|
$i++; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
1
|
|
|
|
|
2
|
foreach my $index ( @tosplice ) { |
|
318
|
1
|
|
|
|
|
2
|
splice(@{$self->{'_individuals'}}, $index,1); |
|
|
1
|
|
|
|
|
3
|
|
|
319
|
|
|
|
|
|
|
} |
|
320
|
1
|
|
|
|
|
2
|
$self->{'_cached_markernames'} = undef; |
|
321
|
1
|
|
|
|
|
3
|
$self->{'_allele_freqs'} = {}; |
|
322
|
1
|
50
|
|
|
|
1
|
return scalar @{$self->{'_individuals'} || []}; |
|
|
1
|
|
|
|
|
3
|
|
|
323
|
|
|
|
|
|
|
} |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=head2 get_Individuals |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Title : get_Individuals |
|
328
|
|
|
|
|
|
|
Usage : my @inds = $pop->get_Individuals(); |
|
329
|
|
|
|
|
|
|
Function: Return the individuals, alternatively restrict by a criteria |
|
330
|
|
|
|
|
|
|
Returns : Array of Bio::PopGen::IndividualI objects |
|
331
|
|
|
|
|
|
|
Args : none if want all the individuals OR, |
|
332
|
|
|
|
|
|
|
-unique_id => To get an individual with a specific id |
|
333
|
|
|
|
|
|
|
-marker => To only get individuals which have a genotype specific |
|
334
|
|
|
|
|
|
|
for a specific marker name |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=cut |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub get_Individuals{ |
|
340
|
93
|
|
|
93
|
1
|
1160
|
my ($self,@args) = @_; |
|
341
|
93
|
50
|
|
|
|
108
|
my @inds = @{$self->{'_individuals'} || []}; |
|
|
93
|
|
|
|
|
306
|
|
|
342
|
93
|
50
|
|
|
|
195
|
return unless @inds; |
|
343
|
93
|
100
|
|
|
|
156
|
if( @args ) { # save a little time here if @args is empty |
|
344
|
3
|
|
|
|
|
10
|
my ($id,$marker) = $self->_rearrange([qw(UNIQUE_ID MARKER)], @args); |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
|
|
347
|
3
|
100
|
|
|
|
10
|
if( defined $id ) { |
|
|
|
50
|
|
|
|
|
|
|
348
|
1
|
|
|
|
|
3
|
@inds = grep { $_->unique_id eq $id } @inds; |
|
|
2
|
|
|
|
|
3
|
|
|
349
|
|
|
|
|
|
|
} elsif (defined $marker) { |
|
350
|
2
|
|
|
|
|
4
|
@inds = grep { $_->has_Marker($marker) } @inds; |
|
|
4
|
|
|
|
|
10
|
|
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
} |
|
353
|
93
|
|
|
|
|
298
|
return @inds; |
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=head2 get_Genotypes |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Title : get_Genotypes |
|
359
|
|
|
|
|
|
|
Usage : my @genotypes = $pop->get_Genotypes(-marker => $name) |
|
360
|
|
|
|
|
|
|
Function: Get the genotypes for all the individuals for a specific |
|
361
|
|
|
|
|
|
|
marker name |
|
362
|
|
|
|
|
|
|
Returns : Array of Bio::PopGen::GenotypeI objects |
|
363
|
|
|
|
|
|
|
Args : -marker => name of the marker |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=cut |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub get_Genotypes{ |
|
369
|
1240
|
|
|
1240
|
1
|
1982
|
my ($self,@args) = @_; |
|
370
|
1240
|
|
|
|
|
2906
|
my ($name) = $self->_rearrange([qw(MARKER)],@args); |
|
371
|
1240
|
50
|
|
|
|
2342
|
if( defined $name ) { |
|
372
|
53050
|
|
|
|
|
59694
|
return grep { defined $_ } map { $_->get_Genotypes(-marker => $name) } |
|
|
53050
|
|
|
|
|
85698
|
|
|
373
|
1240
|
50
|
|
|
|
1325
|
@{$self->{'_individuals'} || []} |
|
|
1240
|
|
|
|
|
2645
|
|
|
374
|
|
|
|
|
|
|
} |
|
375
|
0
|
|
|
|
|
0
|
$self->warn("You needed to have provided a valid -marker value"); |
|
376
|
0
|
|
|
|
|
0
|
return (); |
|
377
|
|
|
|
|
|
|
} |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=head2 get_marker_names |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Title : get_marker_names |
|
383
|
|
|
|
|
|
|
Usage : my @names = $pop->get_marker_names; |
|
384
|
|
|
|
|
|
|
Function: Get the names of the markers |
|
385
|
|
|
|
|
|
|
Returns : Array of strings |
|
386
|
|
|
|
|
|
|
Args : [optional] boolean flag to ignore internal cache status |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=cut |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub get_marker_names { |
|
392
|
78
|
|
|
78
|
1
|
1146
|
my ($self,$force) = @_; |
|
393
|
48
|
50
|
|
|
|
211
|
return @{$self->{'_cached_markernames'} || []} |
|
394
|
78
|
100
|
66
|
|
|
301
|
if( ! $force && defined $self->{'_cached_markernames'}); |
|
395
|
30
|
|
|
|
|
46
|
my %unique; |
|
396
|
30
|
|
|
|
|
64
|
foreach my $n ( map { $_->get_marker_names } $self->get_Individuals() ) { |
|
|
1144
|
|
|
|
|
1735
|
|
|
397
|
41333
|
|
|
|
|
35592
|
$unique{$n}++; |
|
398
|
|
|
|
|
|
|
} |
|
399
|
30
|
|
|
|
|
1489
|
my @nms = keys %unique; |
|
400
|
30
|
100
|
|
|
|
140
|
if( $nms[0] =~ /^(Site|Codon)/ ) { |
|
401
|
|
|
|
|
|
|
# sort by site or codon number and do it in |
|
402
|
|
|
|
|
|
|
# a schwartzian transformation baby! |
|
403
|
2
|
|
|
|
|
6
|
@nms = map { $_->[1] } |
|
404
|
0
|
|
|
|
|
0
|
sort { $a->[0] <=> $b->[0] } |
|
405
|
2
|
|
|
|
|
4
|
map { [$_ =~ /^(?:Codon|Site)-(\d+)/, $_] } @nms; |
|
|
2
|
|
|
|
|
13
|
|
|
406
|
|
|
|
|
|
|
} |
|
407
|
30
|
|
|
|
|
165
|
$self->{'_cached_markernames'} = [ @nms ]; |
|
408
|
30
|
50
|
|
|
|
40
|
return @{$self->{'_cached_markernames'} || []}; |
|
|
30
|
|
|
|
|
348
|
|
|
409
|
|
|
|
|
|
|
} |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=head2 get_Marker |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
Title : get_Marker |
|
415
|
|
|
|
|
|
|
Usage : my $marker = $population->get_Marker($name) |
|
416
|
|
|
|
|
|
|
Function: Get a Bio::PopGen::Marker object based on this population |
|
417
|
|
|
|
|
|
|
Returns : Bio::PopGen::MarkerI object |
|
418
|
|
|
|
|
|
|
Args : name of the marker |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=cut |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub get_Marker{ |
|
424
|
896
|
|
|
896
|
1
|
2415
|
my ($self,$markername) = @_; |
|
425
|
896
|
|
|
|
|
882
|
my $marker; |
|
426
|
|
|
|
|
|
|
# setup some caching too |
|
427
|
896
|
100
|
66
|
|
|
3041
|
if( defined $self->{'_allele_freqs'} && |
|
428
|
|
|
|
|
|
|
defined ($marker = $self->{'_allele_freqs'}->{$markername}) ) { |
|
429
|
|
|
|
|
|
|
# marker is now set to the stored value |
|
430
|
|
|
|
|
|
|
} else { |
|
431
|
490
|
|
|
|
|
1085
|
my @genotypes = $self->get_Genotypes(-marker => $markername); |
|
432
|
490
|
|
|
|
|
1563
|
$marker = Bio::PopGen::Marker->new(-name => $markername); |
|
433
|
|
|
|
|
|
|
|
|
434
|
490
|
50
|
|
|
|
800
|
if( ! @genotypes ) { |
|
435
|
0
|
|
|
|
|
0
|
$self->warn("No genotypes for Marker $markername in the population"); |
|
436
|
|
|
|
|
|
|
} else { |
|
437
|
490
|
|
|
|
|
591
|
my %alleles; |
|
438
|
|
|
|
|
|
|
my $count; |
|
439
|
490
|
|
|
|
|
689
|
for my $al ( map { $_->get_Alleles} @genotypes ) { |
|
|
36128
|
|
|
|
|
46334
|
|
|
440
|
71266
|
50
|
|
|
|
75595
|
next if($al eq '?'); |
|
441
|
71266
|
|
|
|
|
52570
|
$count++; |
|
442
|
71266
|
|
|
|
|
63939
|
$alleles{$al}++ |
|
443
|
|
|
|
|
|
|
} |
|
444
|
490
|
|
|
|
|
3864
|
foreach my $allele ( keys %alleles ) { |
|
445
|
934
|
|
|
|
|
2556
|
$marker->add_Allele_Frequency($allele, $alleles{$allele}/$count); |
|
446
|
934
|
|
|
|
|
1642
|
$marker->{_marker_coverage} = $count/2; |
|
447
|
|
|
|
|
|
|
} |
|
448
|
|
|
|
|
|
|
} |
|
449
|
490
|
|
|
|
|
2448
|
$self->{'_allele_freqs'}->{$markername} = $marker; |
|
450
|
|
|
|
|
|
|
} |
|
451
|
896
|
|
|
|
|
1710
|
return $marker; |
|
452
|
|
|
|
|
|
|
} |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 get_number_individuals |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Title : get_number_individuals |
|
458
|
|
|
|
|
|
|
Usage : my $count = $pop->get_number_individuals; |
|
459
|
|
|
|
|
|
|
Function: Get the count of the number of individuals |
|
460
|
|
|
|
|
|
|
Returns : integer >= 0 |
|
461
|
|
|
|
|
|
|
Args : none |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=cut |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
sub get_number_individuals{ |
|
467
|
1245
|
|
|
1245
|
1
|
1436
|
my ($self,$markername) = @_; |
|
468
|
|
|
|
|
|
|
|
|
469
|
1245
|
50
|
|
|
|
1631
|
if( $self->{'_forced_set_individuals'} ) { |
|
470
|
0
|
|
|
|
|
0
|
return $self->{'_forced_set_individuals'}; |
|
471
|
|
|
|
|
|
|
} |
|
472
|
|
|
|
|
|
|
|
|
473
|
1245
|
100
|
|
|
|
1459
|
unless( defined $markername ) { |
|
474
|
30
|
50
|
|
|
|
36
|
return scalar @{$self->{'_individuals'} || []}; |
|
|
30
|
|
|
|
|
94
|
|
|
475
|
|
|
|
|
|
|
} else { |
|
476
|
1215
|
|
|
|
|
1079
|
my $number =0; |
|
477
|
1215
|
50
|
|
|
|
1014
|
foreach my $individual ( @{$self->{'_individuals'} || []} ) { |
|
|
1215
|
|
|
|
|
1842
|
|
|
478
|
9063
|
50
|
|
|
|
11161
|
$number++ if( $individual->has_Marker($markername)); |
|
479
|
|
|
|
|
|
|
} |
|
480
|
1215
|
|
|
|
|
1667
|
return $number; |
|
481
|
|
|
|
|
|
|
} |
|
482
|
|
|
|
|
|
|
} |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=head2 set_number_individuals |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
Title : set_number_individuals |
|
487
|
|
|
|
|
|
|
Usage : $pop->set_number_individuals($num); |
|
488
|
|
|
|
|
|
|
Function: Fixes the number of individuals, call this with |
|
489
|
|
|
|
|
|
|
0 to unset. |
|
490
|
|
|
|
|
|
|
Only use this if you know what you are doing, |
|
491
|
|
|
|
|
|
|
this is only relevant when you are just adding |
|
492
|
|
|
|
|
|
|
allele frequency data for a population and want to |
|
493
|
|
|
|
|
|
|
calculate something like theta |
|
494
|
|
|
|
|
|
|
Returns : none |
|
495
|
|
|
|
|
|
|
Args : individual count, calling it with undef or 0 |
|
496
|
|
|
|
|
|
|
will reset the value to return a number |
|
497
|
|
|
|
|
|
|
calculated from the number of individuals |
|
498
|
|
|
|
|
|
|
stored for this population. |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=cut |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub set_number_individuals{ |
|
503
|
0
|
|
|
0
|
1
|
0
|
my ($self,$indcount) = @_; |
|
504
|
0
|
|
|
|
|
0
|
return $self->{'_forced_set_individuals'} = $indcount; |
|
505
|
|
|
|
|
|
|
} |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=head2 get_Frequency_Homozygotes |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
Title : get_Frequency_Homozygotes |
|
511
|
|
|
|
|
|
|
Usage : my $freq = $pop->get_Frequency_Homozygotes; |
|
512
|
|
|
|
|
|
|
Function: Calculate the frequency of homozygotes in the population |
|
513
|
|
|
|
|
|
|
Returns : fraction between 0 and 1 |
|
514
|
|
|
|
|
|
|
Args : $markername |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub get_Frequency_Homozygotes{ |
|
520
|
405
|
|
|
405
|
1
|
508
|
my ($self,$marker,$allelename) = @_; |
|
521
|
405
|
|
|
|
|
419
|
my ($homozygote_count) = 0; |
|
522
|
405
|
50
|
33
|
|
|
1271
|
return 0 if ! defined $marker || ! defined $allelename; |
|
523
|
|
|
|
|
|
|
$marker = $marker->name if( defined $marker && |
|
524
|
|
|
|
|
|
|
ref($marker) && |
|
525
|
405
|
0
|
33
|
|
|
888
|
( $self->{'_checkisa'} ? |
|
|
|
0
|
33
|
|
|
|
|
|
526
|
|
|
|
|
|
|
$marker->isa('Bio::PopGen::MarkerI') : 1)); |
|
527
|
405
|
|
|
|
|
513
|
my $total = $self->get_number_individuals($marker); |
|
528
|
405
|
|
|
|
|
601
|
foreach my $genotype ( $self->get_Genotypes($marker) ) { |
|
529
|
3021
|
|
|
|
|
3956
|
my %alleles = map { $_ => 1} $genotype->get_Alleles(); |
|
|
3021
|
|
|
|
|
4710
|
|
|
530
|
|
|
|
|
|
|
# what to do for non-diploid situations? |
|
531
|
3021
|
100
|
|
|
|
5502
|
if( $alleles{$allelename} ) { |
|
532
|
1440
|
50
|
|
|
|
2293
|
$homozygote_count++ if( keys %alleles == 1); |
|
533
|
|
|
|
|
|
|
} |
|
534
|
|
|
|
|
|
|
} |
|
535
|
405
|
50
|
|
|
|
959
|
return $total ? $homozygote_count / $total : 0; |
|
536
|
|
|
|
|
|
|
} |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=head2 get_Frequency_Heterozygotes |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
Title : get_Frequency_Heterozygotes |
|
541
|
|
|
|
|
|
|
Usage : my $freq = $pop->get_Frequency_Homozygotes; |
|
542
|
|
|
|
|
|
|
Function: Calculate the frequency of homozygotes in the population |
|
543
|
|
|
|
|
|
|
Returns : fraction between 0 and 1 |
|
544
|
|
|
|
|
|
|
Args : $markername |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=cut |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
sub get_Frequency_Heterozygotes{ |
|
550
|
0
|
|
|
0
|
1
|
0
|
my ($self,$marker,$allelename) = @_; |
|
551
|
0
|
|
|
|
|
0
|
my ($heterozygote_count) = 0; |
|
552
|
0
|
0
|
0
|
|
|
0
|
return 0 if ! defined $marker || ! defined $allelename; |
|
553
|
|
|
|
|
|
|
$marker = $marker->name if( defined $marker && ref($marker) && |
|
554
|
0
|
0
|
0
|
|
|
0
|
($self->{'_checkisa'} ? |
|
|
|
0
|
0
|
|
|
|
|
|
555
|
|
|
|
|
|
|
$marker->isa('Bio::PopGen::MarkerI') : 1)); |
|
556
|
0
|
0
|
|
|
|
0
|
if( ref($marker) ) { |
|
557
|
0
|
|
|
|
|
0
|
$self->warn("Passed in a ".ref($marker). " to has_Marker, expecting either a string or a Bio::PopGen::MarkerI"); |
|
558
|
0
|
|
|
|
|
0
|
return 0; |
|
559
|
|
|
|
|
|
|
} |
|
560
|
0
|
|
|
|
|
0
|
my $total = $self->get_number_individuals($marker); |
|
561
|
|
|
|
|
|
|
|
|
562
|
0
|
|
|
|
|
0
|
foreach my $genotype ( $self->get_Genotypes($marker) ) { |
|
563
|
0
|
|
|
|
|
0
|
my %alleles = map { $_ => 1} $genotype->get_Alleles(); |
|
|
0
|
|
|
|
|
0
|
|
|
564
|
|
|
|
|
|
|
# what to do for non-diploid situations? |
|
565
|
0
|
0
|
|
|
|
0
|
if( $alleles{$allelename} ) { |
|
566
|
0
|
0
|
|
|
|
0
|
$heterozygote_count++ if( keys %alleles == 2); |
|
567
|
|
|
|
|
|
|
} |
|
568
|
|
|
|
|
|
|
} |
|
569
|
0
|
0
|
|
|
|
0
|
return $total ? $heterozygote_count / $total : 0; |
|
570
|
|
|
|
|
|
|
} |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=head2 haploid_population |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
Title : haploid_population |
|
575
|
|
|
|
|
|
|
Usage : my $pop = $population->haploid_population; |
|
576
|
|
|
|
|
|
|
Function: Make a new population where all the individuals |
|
577
|
|
|
|
|
|
|
are haploid - effectively an individual out of each |
|
578
|
|
|
|
|
|
|
chromosome an individual has. |
|
579
|
|
|
|
|
|
|
Returns : L |
|
580
|
|
|
|
|
|
|
Args : None |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=cut |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
sub haploid_population{ |
|
586
|
33
|
|
|
33
|
1
|
55
|
my ($self) = @_; |
|
587
|
33
|
|
|
|
|
42
|
my @inds; |
|
588
|
33
|
|
|
|
|
60
|
my @marker_names = $self->get_marker_names; |
|
589
|
|
|
|
|
|
|
|
|
590
|
33
|
|
|
|
|
74
|
for my $ind ( $self->get_Individuals ) { |
|
591
|
658
|
|
|
|
|
847
|
my @chromosomes; |
|
592
|
658
|
|
|
|
|
1560
|
my $id = $ind->unique_id; |
|
593
|
|
|
|
|
|
|
# separate genotypes into 'chromosomes' |
|
594
|
658
|
|
|
|
|
1027
|
for my $marker_name( @marker_names ) { |
|
595
|
18918
|
|
|
|
|
41243
|
my ($genotype) = $ind->get_Genotypes(-marker => $marker_name); |
|
596
|
18918
|
|
|
|
|
20916
|
my $i =0; |
|
597
|
18918
|
|
|
|
|
29967
|
for my $allele ( $genotype->get_Alleles ) { |
|
598
|
37194
|
|
|
|
|
33962
|
push @{$chromosomes[$i]}, |
|
|
37194
|
|
|
|
|
118753
|
|
|
599
|
|
|
|
|
|
|
Bio::PopGen::Genotype->new(-marker_name => $marker_name, |
|
600
|
|
|
|
|
|
|
-individual_id => $id.".$i", |
|
601
|
|
|
|
|
|
|
-alleles => [$allele]); |
|
602
|
37194
|
|
|
|
|
72154
|
$i++; |
|
603
|
|
|
|
|
|
|
} |
|
604
|
|
|
|
|
|
|
} |
|
605
|
658
|
|
|
|
|
894
|
for my $chrom ( @chromosomes ) { |
|
606
|
1198
|
|
|
|
|
4559
|
my $copyind = ref($ind)->new(-unique_id => $id.".1", |
|
607
|
|
|
|
|
|
|
-genotypes => $chrom); |
|
608
|
1198
|
|
|
|
|
4468
|
push @inds, $ind; |
|
609
|
|
|
|
|
|
|
} |
|
610
|
|
|
|
|
|
|
} |
|
611
|
33
|
|
|
|
|
223
|
my $population = ref($self)->new(-name => $self->name, |
|
612
|
|
|
|
|
|
|
-source => $self->source, |
|
613
|
|
|
|
|
|
|
-description => $self->description, |
|
614
|
|
|
|
|
|
|
-individuals => \@inds); |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
} |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
1; |