File Coverage

Bio/PopGen/IO/prettybase.pm
Criterion Covered Total %
statement 77 88 87.5
branch 19 34 55.8
condition 8 17 47.0
subroutine 13 13 100.0
pod 5 5 100.0
total 122 157 77.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::PopGen::IO::prettybase
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::PopGen::IO::prettybase - Extract individual allele data from PrettyBase format
17              
18             =head1 SYNOPSIS
19              
20             Do not use directly, use through the Bio::PopGen::IO driver
21              
22             =head1 DESCRIPTION
23              
24             This object will parse comma delimited PrettyBase output. PrettyBase
25             is defined by the SeattleSNPs http://pga.gs.washington.edu/
26              
27             This is expected to be tab delimited (you can vary with the
28             field_delimiter flag SITE SAMPLE ALLELE1 ALLELE2
29              
30             There are 2 initialization parameters, the delimiter
31             (-field_delimiter) [default 'tab'] and a boolean -no_header which
32             specifies if there is no header line to read in. All lines starting
33             with '#' will be skipped
34              
35             =head1 FEEDBACK
36              
37             =head2 Mailing Lists
38              
39             User feedback is an integral part of the evolution of this and other
40             Bioperl modules. Send your comments and suggestions preferably to
41             the Bioperl mailing list. Your participation is much appreciated.
42              
43             bioperl-l@bioperl.org - General discussion
44             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45              
46             =head2 Support
47              
48             Please direct usage questions or support issues to the mailing list:
49              
50             I
51              
52             rather than to the module maintainer directly. Many experienced and
53             reponsive experts will be able look at the problem and quickly
54             address it. Please include a thorough description of the problem
55             with code and data examples if at all possible.
56              
57             =head2 Reporting Bugs
58              
59             Report bugs to the Bioperl bug tracking system to help us keep track
60             of the bugs and their resolution. Bug reports can be submitted via
61             the web:
62              
63             https://github.com/bioperl/bioperl-live/issues
64              
65             =head1 AUTHOR - Jason Stajich
66              
67             Email jason-at-bioperl.org
68              
69             =head1 CONTRIBUTORS
70              
71             Matthew Hahn, matthew.hahn-at-duke.edu
72              
73             =head1 APPENDIX
74              
75             The rest of the documentation details each of the object methods.
76             Internal methods are usually preceded with a _
77              
78             =cut
79              
80              
81             # Let the code begin...
82              
83              
84             package Bio::PopGen::IO::prettybase;
85 1     1   7 use vars qw($FieldDelim $Header);
  1         2  
  1         52  
86 1     1   5 use strict;
  1         3  
  1         39  
87              
88             ($FieldDelim,$Header) =( '\t',0);
89              
90              
91 1     1   5 use Bio::PopGen::Individual;
  1         2  
  1         18  
92 1     1   4 use Bio::PopGen::Population;
  1         1  
  1         15  
93 1     1   4 use Bio::PopGen::Genotype;
  1         2  
  1         19  
94              
95 1     1   4 use base qw(Bio::PopGen::IO);
  1         2  
  1         853  
96              
97             =head2 new
98              
99             Title : new
100             Usage : my $obj = Bio::PopGen::IO::prettybase->new();
101             Function: Builds a new Bio::PopGen::IO::prettybase object
102             Returns : an instance of Bio::PopGen::IO::prettybase
103             Args : -field_delimiter => a field delimiter character or regexp (default is /\t/ )
104             -header => boolean if the file will have a header and parser should
105             skip first line in the file (default is false)
106             -convert_indel_states => convert alleles which are longer than one character
107             to an 'I' meaning insert state, and alleles which are
108             '-' to a delete state.
109             (default is false)
110              
111             =cut
112              
113             sub _initialize {
114 5     5   15 my($self, @args) = @_;
115 5         17 my ($fieldsep,
116             $conv_indels,
117             $header) = $self->_rearrange([qw(FIELD_DELIMITER
118             CONVERT_INDEL_STATES
119             HEADER)],@args);
120              
121 5 50       24 $self->flag('header', defined $header ? $header : $Header);
122 5 50       12 $self->flag('field_delimiter',defined $fieldsep ? $fieldsep : $FieldDelim);
123 5         8 $self->{'_header'} = undef;
124 5         12 $self->{'_parsed_individiuals'} = [];
125 5         11 $self->{'_parsed'} = 0;
126 5   50     15 $self->flag('convert_indel',$conv_indels || 0);
127 5         8 return 1;
128             }
129              
130             =head2 flag
131              
132             Title : flag
133             Usage : $obj->flag($flagname,$newval)
134             Function: Get/Set the flag value
135             Returns : value of a flag (a boolean)
136             Args : A flag name, currently we expect
137             'header', 'field_delimiter', or 'allele_delimiter'
138             on set, new value (a boolean or undef, optional)
139              
140              
141             =cut
142              
143             sub flag{
144 124     124 1 139 my $self = shift;
145 124         150 my $fieldname = shift;
146 124 50       187 return unless defined $fieldname;
147            
148 124 100       187 return $self->{'_flag'}->{$fieldname} = shift if @_;
149 109         1110 return $self->{'_flag'}->{$fieldname};
150             }
151              
152              
153             =head2 next_individual
154              
155             Title : next_individual
156             Usage : my $ind = $popgenio->next_individual;
157             Function: Retrieve the next individual from a dataset
158             Returns : Bio::PopGen::IndividualI object
159             Args : none
160              
161              
162             =cut
163              
164             sub next_individual {
165 42     42 1 67 my ($self) = @_;
166 42 100       58 unless( $self->{'_parsed'} ) {
167 3         9 $self->_parse_prettybase;
168             }
169 42         66 return $self->{'_parsed_individiuals'}->[$self->{'_iterator'}++];
170             }
171              
172              
173              
174             =head2 next_population
175              
176             Title : next_population
177             Usage : my $ind = $popgenio->next_population;
178             Function: Retrieve the next population from a dataset
179             Returns : Bio::PopGen::PopulationI object
180             Args : none
181             Note : Many implementation will not implement this
182              
183             =cut
184              
185             # Plan is to just return the whole dataset as a single population by
186             # default I think - people would then have each population in a separate
187             # file.
188              
189             sub next_population{
190 2     2 1 14 my ($self) = @_;
191 2         3 my @inds;
192 2         7 while( my $ind = $self->next_individual ) {
193 33         55 push @inds, $ind;
194             }
195 2 50       4 return unless @inds;
196 2         13 Bio::PopGen::Population->new(-individuals => \@inds);
197             }
198              
199              
200             sub _parse_prettybase {
201 3     3   5 my $self = shift;
202 3         5 my %inds;
203 3         7 my $convert_indels = $self->flag('convert_indel');
204 3         17 while( defined( $_ = $self->_readline) ) {
205 110 100 66     548 next if( /^\s*\#/ || /^\s+$/ || ! length($_) );
      66        
206            
207 106         185 my ($site,$sample,@alleles) = split($self->flag('field_delimiter'),$_);
208 106 50       224 if( ! defined $sample ) {
209 0         0 warn("sample id is undefined for $_");
210 0         0 next;
211             }
212 106         154 for my $allele ( @alleles ) {
213 182         229 $allele =~ s/^\s+//;
214 182         367 $allele =~ s/\s+$//;
215 182 50       285 if( $convert_indels ) {
216 0 0       0 if( length($allele) > 1 ) {
    0          
217             # we have an insert state
218 0         0 $allele = 'I';
219             } elsif( $allele eq '-' ) {
220             # have a delete state
221 0         0 $allele = 'D';
222             }
223             }
224             }
225            
226 106         300 my $g = Bio::PopGen::Genotype->new(-alleles => \@alleles,
227             -marker_name => $site,
228             -individual_id=> $sample);
229            
230              
231 106 100       228 if( ! defined $inds{$sample} ) {
232 39         115 $inds{$sample} = Bio::PopGen::Individual->new(-unique_id => $sample);
233             }
234 106         206 $inds{$sample}->add_Genotype($g);
235             }
236 3         13 $self->{'_parsed_individiuals'} = [ values %inds ];
237 3         5 $self->{'_parsed'} = 1;
238 3         11 return;
239             }
240              
241              
242             =head2 write_individual
243              
244             Title : write_individual
245             Usage : $popgenio->write_individual($ind);
246             Function: Write an individual out in the file format
247             Returns : none
248             Args : L object(s)
249              
250             =cut
251              
252             sub write_individual{
253 1     1 1 7 my ($self,@inds) = @_;
254 1         2 foreach my $ind ( @inds ) {
255 22 50 33     85 if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
256 0         0 $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object");
257 0         0 next;
258             }
259 22         36 foreach my $marker ( $ind->get_marker_names ) {
260 440         681 my $g = $ind->get_Genotypes(-marker=> $marker);
261 440 50       553 next unless defined $g;
262 440         593 $self->_print( join("\t", $marker, $ind->unique_id,
263             $g->get_Alleles), "\n");
264             }
265             }
266            
267             }
268              
269              
270              
271             =head2 write_population
272              
273             Title : write_population
274             Usage : $popgenio->write_population($pop);
275             Function: Write a population out in the file format
276             Returns : none
277             Args : L object(s)
278             Note : Many implementation will not implement this
279              
280             =cut
281              
282             sub write_population{
283 1     1 1 5 my ($self,@pops) = @_;
284 1         2 foreach my $pop ( @pops ) {
285 2 50 33     14 if (! ref($pop) || ! $pop->isa('Bio::PopGen::PopulationI') ) {
286 0         0 $self->warn("Cannot write an object that is not a Bio::PopGen::PopulationI object");
287 0         0 next;
288             }
289 2         7 my @mnames = $pop->get_marker_names;
290 2         5 foreach my $ind ( $pop->get_Individuals ) {
291 17 50 33     73 if (! ref($ind) || ! $ind->isa('Bio::PopGen::IndividualI') ) {
292 0         0 $self->warn("Cannot write an object that is not a Bio::PopGen::IndividualI object");
293 0         0 next;
294             }
295 17         26 foreach my $marker ( @mnames ) {
296 340         535 my $g = $ind->get_Genotypes(-marker=> $marker);
297 340 50       445 next unless defined $g;
298 340         451 $self->_print( join("\t", $marker, $ind->unique_id,
299             $g->get_Alleles), "\n");
300            
301             }
302             }
303             }
304             }
305              
306             1;