File Coverage

Bio/PopGen/IO/phase.pm
Criterion Covered Total %
statement 127 142 89.4
branch 32 46 69.5
condition 14 33 42.4
subroutine 11 12 91.6
pod 5 5 100.0
total 189 238 79.4


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::PopGen::IO::phase
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Rich Dobson
7             #
8             # Copyright Rich Dobson
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::PopGen::IO::phase - A parser for Phase format data
17              
18             =head1 SYNOPSIS
19              
20             # Do not use directly, use through the Bio::PopGen::IO driver
21              
22             use Bio::PopGen::IO;
23             my $io = Bio::PopGen::IO->new(-format => 'phase',
24             -file => 'data.phase');
25              
26             # Some IO might support reading in a population at a time
27              
28             my @population;
29             while( my $ind = $io->next_individual ) {
30             push @population, $ind;
31             }
32              
33              
34             =head1 DESCRIPTION
35              
36             A driver module for Bio::PopGen::IO for parsing phase data.
37              
38             PHASE is defined in http://www.stat.washington.edu/stephens/instruct2.1.pdf
39              
40             =head1 FEEDBACK
41              
42             =head2 Mailing Lists
43              
44             User feedback is an integral part of the evolution of this and other
45             Bioperl modules. Send your comments and suggestions preferably to
46             the Bioperl mailing list. Your participation is much appreciated.
47              
48             bioperl-l@bioperl.org - General discussion
49             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50              
51             =head2 Support
52              
53             Please direct usage questions or support issues to the mailing list:
54              
55             I
56              
57             rather than to the module maintainer directly. Many experienced and
58             reponsive experts will be able look at the problem and quickly
59             address it. Please include a thorough description of the problem
60             with code and data examples if at all possible.
61              
62             =head2 Reporting Bugs
63              
64             Report bugs to the Bioperl bug tracking system to help us keep track
65             of the bugs and their resolution. Bug reports can be submitted via
66             the web:
67              
68             https://github.com/bioperl/bioperl-live/issues
69              
70             =head1 AUTHOR - Rich Dobson
71              
72             Email r.j.dobson-at-qmul.ac.uk
73              
74             =head1 CONTRIBUTORS
75              
76             Jason Stajich, jason-at-bioperl.org
77              
78             =head1 APPENDIX
79              
80             The rest of the documentation details each of the object methods.
81             Internal methods are usually preceded with a _
82              
83             =cut
84              
85              
86             # Let the code begin...
87              
88              
89             package Bio::PopGen::IO::phase;
90 1     1   5 use vars qw($FieldDelim $AlleleDelim $NoHeader);
  1         2  
  1         59  
91 1     1   4 use strict;
  1         2  
  1         34  
92              
93             ($FieldDelim, $AlleleDelim, $NoHeader) = (' ', '\s+', 1);
94              
95              
96              
97              
98 1     1   3 use Bio::PopGen::Individual;
  1         1  
  1         15  
99 1     1   3 use Bio::PopGen::Population;
  1         2  
  1         14  
100 1     1   18 use Bio::PopGen::Genotype;
  1         1  
  1         25  
101              
102 1     1   4 use base qw(Bio::PopGen::IO);
  1         1  
  1         1268  
103              
104             =head2 new
105              
106             Title : new
107             Usage : my $obj = Bio::PopGen::IO::hapmap->new();
108             Function: Builds a new Bio::PopGen::IO::hapmap object
109             Returns : an instance of Bio::PopGen::IO::hapmap
110             Args : [optional, these are the current defaults]
111             -field_delimiter => ' '
112             -allele_delimiter=> '\s+'
113             -no_header => 0,
114              
115              
116             =cut
117              
118              
119             sub _initialize {
120              
121 3     3   5 my($self, @args) = @_;
122              
123 3         7 $Bio::PopGen::Genotype::BlankAlleles='';
124              
125 3         9 my ($fieldsep,$all_sep,
126             $noheader) = $self->_rearrange([qw(FIELD_DELIMITER
127             ALLELE_DELIMITER
128             NO_HEADER)],@args);
129              
130 3 50       13 $self->flag('no_header', defined $noheader ? $noheader : $NoHeader);
131 3 50       11 $self->flag('field_delimiter',defined $fieldsep ? $fieldsep : $FieldDelim);
132 3 50       8 $self->flag('allele_delimiter',defined $all_sep ? $all_sep : $AlleleDelim);
133              
134 3         4 $self->{'_header'} = undef;
135 3         7 return 1;
136              
137             }
138              
139             =head2 flag
140              
141             Title : flag
142             Usage : $obj->flag($flagname,$newval)
143             Function: Get/Set the flag value
144             Returns : value of a flag (a boolean)
145             Args : A flag name, currently we expect
146             'no_header', 'field_delimiter', or 'allele_delimiter'
147             on set, new value (a boolean or undef, optional)
148              
149              
150             =cut
151              
152             sub flag {
153              
154 149     149 1 109 my $self = shift;
155 149         101 my $fieldname = shift;
156 149 50       173 return unless defined $fieldname;
157              
158 149 100       196 return $self->{'_flag'}->{$fieldname} = shift if @_;
159 134         420 return $self->{'_flag'}->{$fieldname};
160              
161             }
162              
163             =head2 next_individual
164              
165             Title : next_individual
166             Usage : my $ind = $popgenio->next_individual;
167             Function: Retrieve the next individual from a dataset
168             Returns : L object
169             Args : none
170              
171              
172             =cut
173              
174             sub next_individual {
175 14     14 1 25 my ($self) = @_;
176              
177 14         9 my ($sam,@marker_results,$number_of_ids,$number_of_markers,
178             $marker_positions,$micro_snp);
179              
180 14         31 while( defined( $_ = $self->_readline) ) {
181 13         12 chomp;
182 13 50 33     49 next if( /^\s+$/ || ! length($_) );
183 13         9 last;
184             }
185            
186 14 100       26 return unless defined $_;
187 13 50 33     33 if( $self->flag('no_header') || defined $self->{'_header'} ) {
188              
189             ####### sometimes there is some marker info at the start of a phase input file
190             ####### we collect it in the next few lines if there is. Should this info be held in a marker object?
191              
192 13 100 66     94 if(!$self->{'_count'} && /^\s*\d+$/){
    100 66        
    100 66        
    100 66        
    100          
193 1         2 $self->flag('number_of_ids',$_);
194             #print "number_of_ids : $number_of_ids\n";
195 1         2 $self->{'_count'}++;
196 1         8 return $self->next_individual;
197             } elsif($self->{'_count'} == 1 && /^\s*\d+$/){
198 1         2 $self->flag('number_of_markers',$_);
199             #print "number_of_markers : $number_of_markers\n";
200 1         2 $self->{'_count'}++;
201 1         7 return $self->next_individual;
202             } elsif($self->{'_count'} == 2 && /^\s*P\s\d/){
203 1         3 $self->flag('marker_positions',$_);
204             #print "marker_position : $marker_positions\n";
205 1         1 $self->{'_count'}++;
206 1         6 return $self->next_individual;
207             } elsif($self->{'_count'} == 3 && /^\s*(M|S)+\s*$/i){
208 1         3 $self->flag('micro_snp',$_);
209             #print "microsat or snp : $micro_snp\n";
210 1         1 $self->{'_count'}++;
211 1         6 return $self->next_individual;
212             } elsif(/^\s*\#/){
213 3         7 ($self->{'_sam'}) = /^\s*\#(.+)/;
214             #print "sample : $self->{'_sam'}\n";
215 3         3 $self->{'_count'}++;
216 3         8 return $self->next_individual;
217             } else {
218 6 100       9 if( $self->{'_row1'} ) {
219             # if we are looking at the 2nd row of alleles for this id
220              
221 3         5 @{$self->{'_second_row'}} =
  3         8  
222             split($self->flag('field_delimiter'),$_);
223              
224 3         3 for my $i(0 .. $#{$self->{'_first_row'}}){
  3         10  
225              
226 15         19 push(@{$self->{'_marker_results'}},
227             $self->{'_first_row'}->[$i].
228             $self->flag('field_delimiter').
229 15         10 $self->{'_second_row'}->[$i]);
230             }
231 3         5 $self->{'_row1'} = 0;
232             } else {
233             # if we are looking at the first row of alleles for this id
234 3         3 @{$self->{'_marker_results'}} = ();
  3         6  
235 3         7 @{$self->{'_first_row'}} = split($self->flag('field_delimiter'),$_);
  3         10  
236 3         5 $self->{'_row1'} = 1;
237 3         7 return $self->next_individual;
238             }
239             }
240              
241 3         1 my $i = 1;
242 3         3 foreach my $m ( @{$self->{'_marker_results'}} ) {
  3         5  
243 15         26 $m =~ s/^\s+//;
244 15         21 $m =~ s/\s+$//;
245 15         13 my $markername;
246 15 50       21 if( defined($self->flag('marker_positions')) ) {
    0          
247 15         17 $markername = (split($self->flag('field_delimiter'), $self->flag('marker_positions')))[$i];
248             } elsif( defined $self->{'_header'} ) {
249 0   0     0 $markername = $self->{'_header'}->[$i] || "$i";
250             } else {
251 0         0 $markername = "$i";
252             }
253              
254 15         16 my $markertype;
255 15 50       16 if( defined($self->flag('marker_positions')) ) {
256 15         21 $markertype = (split('', $self->flag('micro_snp')))[$i-1];
257             } else {
258 0         0 $markertype = "S";
259             }
260              
261 15         49 $self->debug( "markername is $markername alleles are $m\n");
262 15         18 my @alleles = split($self->flag('allele_delimiter'), $m);
263              
264             $m = Bio::PopGen::Genotype->new(-alleles =>\@alleles,
265             -marker_name => $markername,
266             -marker_type => $markertype,
267 15         60 -individual_id => $self->{'_sam'});
268 15         22 $i++;
269             }
270             return Bio::PopGen::Individual->new(-unique_id => $self->{'_sam'},
271 3         7 -genotypes =>\@{$self->{'_marker_results'}},
  3         11  
272             );
273              
274             } else {
275 0         0 $self->{'_header'} = [split($self->flag('field_delimiter'),$_)];
276 0         0 return $self->next_individual; # rerun loop again
277             }
278 0         0 return;
279             }
280              
281             =head2 next_population
282              
283             Title : next_population
284             Usage : my $ind = $popgenio->next_population;
285             Function: Retrieve the next population from a dataset
286             Returns : L object
287             Args : none
288             Note : Many implementation will not implement this
289              
290             =cut
291              
292             sub next_population{
293 0     0 1 0 my ($self) = @_;
294 0         0 my @inds;
295 0         0 while( my $ind = $self->next_individual ) {
296 0         0 push @inds, $ind;
297             }
298 0         0 Bio::PopGen::Population->new(-individuals => \@inds);
299             }
300              
301             =head2 write_individual
302              
303             Title : write_individual
304             Usage : $popgenio->write_individual($ind);
305             Function: Write an individual out in the file format
306             Returns : none
307             Args : L object(s)
308              
309             =cut
310              
311              
312             sub write_individual {
313 2     2 1 2 my ($self,@inds) = @_;
314 2         6 my $fielddelim = $self->flag('field_delimiter');
315 2         5 my $alleledelim = $self->flag('allele_delimiter');
316              
317             # For now capture print_header flag from @inds
318 2         5 my $header = 1;
319 2 100       10 $header = pop(@inds) if($inds[-1] =~ m/^[01]$/);
320              
321 2         5 foreach my $ind ( @inds ) {
322 4 50 33     23 if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
323 0         0 $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object ($ind)");
324 0         0 next;
325             }
326              
327             # sort lexically until we have a better way to insure a consistent order
328 4         11 my @marker_names = sort $ind->get_marker_names;
329              
330 4 100       10 if ($header) {
331 1         1 my $n_markers = scalar(@marker_names);
332 1         7 $self->_print( "1\n");
333 1         2 $self->_print( $n_markers, "\n");
334 1 50 33     1 if( $self->flag('no_header') &&
335             ! $self->flag('header_written') ) {
336 1         5 $self->_print(join($fielddelim, ('P', @marker_names)), "\n");
337 1         1 $self->flag('header_written',1);
338             }
339 1         3 foreach my $geno ($ind->get_Genotypes()) {
340 34         44 $self->_print($geno->marker_type);
341             }
342 1         4 $self->_print("\n");
343             }
344            
345 4         4 my(@row1,@row2);
346 4         6 for (@marker_names){
347 49         83 my $geno = $ind->get_Genotypes(-marker => $_);
348 49         69 my @alleles = $geno->get_Alleles(1);
349 49         52 push(@row1,$alleles[0]);
350 49         69 push(@row2,$alleles[1]);
351             }
352 4         10 $self->_print("#",$ind->unique_id,"\n",
353             join($fielddelim,@row1),"\n",
354             join($fielddelim,@row2),"\n");
355             }
356             }
357              
358             =head2 write_population
359              
360             Title : write_population
361             Usage : $popgenio->write_population($pop);
362             Function: Write a population out in the file format
363             Returns : none
364             Args : L object(s)
365             Note : Many implementation will not implement this
366              
367             =cut
368              
369              
370             sub write_population {
371 1     1 1 2 my ($self,@pops) = @_;
372 1         3 my $fielddelim = $self->flag('field_delimiter');
373 1         3 my $alleledelim = $self->flag('allele_delimiter');
374              
375 1         2 foreach my $pop ( @pops ) {
376 1 50 33     10 if (! ref($pop) || ! $pop->isa('Bio::PopGen::PopulationI') ) {
377 0         0 $self->warn("Cannot write an object that is not a Bio::PopGen::PopulationI object");
378 0         0 next;
379             }
380             # sort lexically until we have a better way to insure a consistent order
381 1         6 my @marker_names = sort $pop->get_marker_names;
382 1         2 my $n_markers = scalar(@marker_names);
383 1         4 $self->_print( $pop->get_number_individuals, "\n");
384 1         3 $self->_print( $n_markers, "\n");
385 1 50 33     4 if( $self->flag('no_header') &&
386             ! $self->flag('header_written') ) {
387 1         4 $self->_print( join($fielddelim, ('P', @marker_names)), "\n");
388 1         2 $self->flag('header_written',1);
389             }
390              
391 1         3 foreach (@marker_names) {
392 5         19 $self->_print(($pop->get_Genotypes($_))[0]->marker_type);
393             }
394 1         4 $self->_print("\n");
395            
396 1         3 $self->write_individual( $pop->get_Individuals, 0 );
397             }
398             }
399              
400             1;