File Coverage

Bio/SeqIO/mbsout.pm
Criterion Covered Total %
statement 177 191 92.6
branch 35 46 76.0
condition 2 5 40.0
subroutine 29 30 96.6
pod 19 22 86.3
total 262 294 89.1


line stmt bran cond sub pod time code
1             # POD documentation - main docs before the code
2              
3             =head1 NAME
4              
5             Bio::SeqIO::mbsout - input stream for output by Teshima et al.'s mbs.
6              
7             =head1 SYNOPSIS
8              
9             Do not use this module directly. Use it via the Bio::SeqIO class.
10              
11             =head1 DESCRIPTION
12              
13             mbs (Teshima KM, Innan H (2009) mbs: modifying Hudson's ms software to generate
14             samples of DNA sequences with a biallelic site under selection. BMC
15             Bioinformatics 10: 166 ) can be found at
16             http://www.biomedcentral.com/1471-2105/10/166/additional/.
17              
18             Currently this object can be used to read output from mbs into seq objects.
19             However, because bioperl has no support for haplotypes created using an infinite
20             sites model (where '1' identifies a derived allele and '0' identifies an
21             ancestral allele), the sequences returned by mbsout are coded using A, T, C and
22             G. To decode the bases, use the sequence conversion table (a hash) returned by
23             get_base_conversion_table(). In the table, 4 and 5 are used when the ancestry is
24             unclear. This should not ever happen when creating files with mbs, but it will
25             be used when creating mbsOUT files from a collection of seq objects ( To be
26             added later ). Alternatively, use get_next_hap() to get a string with 1's and
27             0's instead of a seq object.
28              
29             =head1 FEEDBACK
30              
31             =head2 Mailing Lists
32              
33             User feedback is an integral part of the evolution of this and other
34             Bioperl modules. Send your comments and suggestions preferably to the
35             Bioperl mailing list. Your participation is much appreciated.
36              
37             bioperl-l@bioperl.org - General discussion
38             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39              
40             =head2 Reporting Bugs
41              
42             Report bugs to the Bioperl bug tracking system to help us keep track
43             of the bugs and their resolution. Bug reports can be submitted via the
44             web:
45              
46             https://github.com/bioperl/bioperl-live/issues
47              
48             =head1 AUTHOR - Warren Kretzschmar
49              
50             This module was written by Warren Kretzschmar
51              
52             email: wkretzsch@gmail.com
53              
54             This module grew out of a parser written by Aida Andres.
55              
56             =head1 COPYRIGHT
57              
58             =head2 Public Domain Notice
59              
60             This software/database is ``United States Government Work'' under the
61             terms of the United States Copyright Act. It was written as part of
62             the authors' official duties for the United States Government and thus
63             cannot be copyrighted. This software/database is freely available to
64             the public for use without a copyright notice. Restrictions cannot
65             be placed on its present or future use.
66              
67             Although all reasonable efforts have been taken to ensure the accuracy
68             and reliability of the software and data, the National Human Genome
69             Research Institute (NHGRI) and the U.S. Government does not and cannot
70             warrant the performance or results that may be obtained by using this
71             software or data. NHGRI and the U.S. Government disclaims all
72             warranties as to performance, merchantability or fitness for any
73             particular purpose.
74              
75             =head1 METHODS
76              
77             =cut
78              
79             package Bio::SeqIO::mbsout;
80              
81 1     1   5 use version;
  1         1  
  1         6  
82             our $API_VERSION = qv('1.1.3');
83              
84 1     1   72 use strict;
  1         2  
  1         17  
85 1     1   4 use base qw(Bio::SeqIO); # This ISA Bio::SeqIO object
  1         1  
  1         320  
86 1     1   213 use Bio::Seq::SeqFactory;
  1         3  
  1         1390  
87              
88             =head2 INTERNAL METHODS
89              
90             =head3 _initialize
91              
92             Title : _initialize
93             Usage : $stream = Bio::SeqIO::mbsout->new($infile)
94             Function: extracts basic information about the file.
95             Returns : Bio::SeqIO object
96             Args : no_og
97             Details : include 'no_og' flag = 0 if the last population of an mbsout file
98             contains only one haplotype and you want the last haplotype to be
99             treated as the outgroup.
100             =cut
101              
102             sub _initialize {
103 3     3   6 my ( $self, @args ) = @_;
104 3         10 $self->SUPER::_initialize(@args);
105              
106 3 50       10 unless ( defined $self->sequence_factory ) {
107 3         18 $self->sequence_factory( Bio::Seq::SeqFactory->new() );
108             }
109              
110             # Don't expect mbs to create an outgroup
111 3   50     10 my ($no_og) = $self->_rearrange( [qw(NO_OG)], @args ) || 1;
112              
113 3         28 my %initial_values = (
114             RUNS => undef,
115             SEGSITES => undef,
116             MBS_INFO_LINE => undef,
117             TOT_RUN_HAPS => undef,
118             NEXT_RUN_NUM => undef, # What run is the next hap from? undef = EOF
119             LAST_READ_HAP_NUM => undef, # What did we just read from
120             LAST_READ_POSITIONS => [],
121             LAST_READ_SEGSITES => undef,
122             BUFFER_HAP => undef,
123             NO_OUTGROUP => $no_og,
124             OPTIONS => {},
125             LAST_READ_ALLELES => [],
126             LAST_READ_TRAJECTORY_FILE => undef,
127             LAST_READ_REPLICATION_OF_TRAJECTORY_FILE => undef,
128             BASE_CONVERSION_TABLE_HASH_REF => {
129             'A' => 0,
130             'T' => 1,
131             'C' => 4,
132             'G' => 5,
133             },
134             );
135              
136 3         12 foreach my $key ( keys %initial_values ) {
137 45         50 $self->{$key} = $initial_values{$key};
138             }
139              
140             # If the filehandle is defined open it and read a few lines
141 3 50       9 if ( ref( $self->{_filehandle} ) eq 'GLOB' ) {
142 3         9 $self->_read_start();
143 3         12 return $self;
144             }
145              
146             # Otherwise throw a warning
147             else {
148 0         0 $self->throw(
149             "No filehandle defined. Please define a file handle through -file when calling mbsout with Bio::SeqIO"
150             );
151             }
152             }
153              
154             =head3 _read_start
155              
156             Title : _read_start
157             Usage : $stream->_read_start()
158             Function: reads from the filehandle $stream->{_filehandle} all information up to the first haplotype (sequence).
159             Returns : void
160             Args : none
161              
162             =cut
163              
164             sub _read_start {
165 3     3   3 my $self = shift;
166              
167 3         5 my $fh_IN = $self->{_filehandle};
168              
169             # get the first five lines and parse for important info
170 3         7 my ($mbs_info_line) = $self->_get_next_clean_hap( $fh_IN, 1, 1 );
171              
172 3         19 my @mbs_info_line = split( /\s+/, $mbs_info_line );
173              
174             # Parsing the mbs header line
175 3         4 shift @mbs_info_line;
176 3         4 shift @mbs_info_line;
177 3         3 my $tot_run_haps = shift @mbs_info_line;
178 3         15 my $runs;
179              
180             # $pop_mut_param_per_site is the population mutation parameter per site.
181             my $pop_mut_param_per_site;
182              
183             # $pop_recomb_param_per_site is the population recombination parameter per
184             # site.
185 3         0 my $pop_recomb_param_per_site;
186              
187             # $nsites is length of the simulated region.
188             # $selpos is position of the target site of selection relative to the first
189             # site of the simulated region.
190 3         0 my $nsites;
191 3         0 my $selpos;
192              
193             # $nfile is number of trajectory files.
194             # $nrep is number of replications for each trajectory.
195             # $traj_filename is initial part of the name of the trajectory files.
196 3         0 my $nfiles;
197 3         0 my $nreps;
198 3         0 my $traj_filename;
199              
200 3         5 foreach my $word ( 0 .. $#mbs_info_line ) {
201 33 100       66 if ( $mbs_info_line[$word] eq '-t' ) {
    100          
    100          
    100          
202 3         5 $pop_mut_param_per_site = $mbs_info_line[ $word + 1 ];
203             }
204             elsif ( $mbs_info_line[$word] eq '-r' ) {
205 3         4 $pop_recomb_param_per_site = $mbs_info_line[ $word + 1 ];
206 3         29 $selpos = $mbs_info_line[ $word + 2 ];
207             }
208             elsif ( $mbs_info_line[$word] eq '-s' ) {
209 3         5 $nsites = $mbs_info_line[ $word + 1 ];
210 3         3 $selpos = $mbs_info_line[ $word + 2 ];
211             }
212             elsif ( $mbs_info_line[$word] eq '-f' ) {
213 3         3 $nfiles = $mbs_info_line[ $word + 1 ];
214 3         4 $nreps = $mbs_info_line[ $word + 2 ];
215 3         4 $traj_filename = $mbs_info_line[ $word + 3 ];
216 3         6 $runs = $nfiles * $nreps;
217             }
218 21         22 else { next; }
219             }
220              
221             # Save mbs info data
222 3         4 $self->{RUNS} = $runs;
223 3         4 $self->{MBS_INFO_LINE} = $mbs_info_line;
224 3         5 $self->{TOT_RUN_HAPS} = $tot_run_haps;
225 3         4 $self->{POP_MUT_PARAM_PER_SITE} = $pop_mut_param_per_site;
226 3         4 $self->{POP_RECOMB_PARAM_PER_SITE} = $pop_recomb_param_per_site;
227 3         4 $self->{NSITES} = $nsites;
228 3         5 $self->{SELPOS} = $selpos;
229 3         4 $self->{NFILES} = $nfiles;
230 3         4 $self->{NREPS} = $nreps;
231 3         12 $self->{TRAJ_FILENAME} = $traj_filename;
232             }
233              
234             =head2 Methods to retrieve mbsout data
235              
236             =head3 get_segsites
237              
238             Title : get_segsites
239             Usage : $segsites = $stream->get_segsites()
240             Function: returns the number segsites in the mbsout file (according to the mbsout header line).
241             Returns : scalar
242             Args : NONE
243              
244             =cut
245              
246             sub get_segsites {
247 3     3 1 1470 my $self = shift;
248 3 50       7 if ( defined $self->{SEGSITES} ) {
249 0         0 return $self->{SEGSITES};
250             }
251             else {
252 3         7 return $self->get_current_run_segsites;
253             }
254             }
255              
256             =head3 get_current_run_segsites
257              
258             Title : get_current_run_segsites
259             Usage : $segsites = $stream->get_current_run_segsites()
260             Function: returns the number of segsites in the run of the last read haplotype (sequence).
261             Returns : scalar
262             Args : NONE
263              
264             =cut
265              
266             sub get_current_run_segsites {
267 48     48 1 1391 my $self = shift;
268 48         154 return $self->{LAST_READ_SEGSITES};
269             }
270              
271             =head3 get_pop_mut_param_per_site
272              
273             Title : get_pop_mut_param_per_site
274             Usage : $pop_mut_param_per_site = $stream->get_pop_mut_param_per_site()
275             Function: returns 4*N0*mu or the "population mutation parameter per site"
276             Returns : scalar
277             Args : NONE
278              
279             =cut
280              
281             sub get_pop_mut_param_per_site {
282 3     3 1 1427 my $self = shift;
283 3         9 return $self->{POP_MUT_PARAM_PER_SITE};
284             }
285              
286             =head3 get_pop_recomb_param_per_site
287              
288             Title : get_pop_recomb_param_per_site
289             Usage : $pop_recomb_param_per_site = $stream->get_pop_recomb_param_per_site()
290             Function: returns 4*N0*r or the "population recombination parameter per site"
291             Returns : scalar
292             Args : NONE
293              
294             =cut
295              
296             sub get_pop_recomb_param_per_site {
297 3     3 1 1366 my $self = shift;
298 3         8 return $self->{POP_RECOMB_PARAM_PER_SITE};
299             }
300              
301             =head3 get_nsites
302              
303             Title : get_nsites
304             Usage : $nsites = $stream->get_nsites()
305             Function: returns the number of sites simulated by mbs.
306             Returns : scalar
307             Args : NONE
308              
309             =cut
310              
311             sub get_nsites {
312 3     3 1 1365 my $self = shift;
313 3         9 return $self->{NSITES};
314             }
315              
316             =head3 get_selpos
317              
318             Title : get_selpos
319             Usage : $selpos = $stream->get_selpos()
320             Function: returns the location on the chromosome where the allele is located that was selected for by mbs.
321             Returns : scalar
322             Args : NONE
323              
324             =cut
325              
326             sub get_selpos {
327 3     3 1 1593 my $self = shift;
328 3         8 return $self->{SELPOS};
329             }
330              
331             =head3 get_nreps
332              
333             Title : get_nreps
334             Usage : $nreps = $stream->get_nreps()
335             Function: returns the number replications done by mbs on each trajectory file to create the mbsout file.
336             Returns : scalar
337             Args : NONE
338              
339             =cut
340              
341             sub get_nreps {
342 3     3 1 1697 my $self = shift;
343 3         9 return $self->{NREPS};
344             }
345              
346             =head3 get_nfiles
347              
348             Title : get_nfiles
349             Usage : $nfiles = $stream->get_nfiles()
350             Function: returns the number of trajectory files used by mbs to create the mbsout file
351             Returns : scalar
352             Args : NONE
353              
354             =cut
355              
356             sub get_nfiles {
357 3     3 1 1077 my $self = shift;
358 3         9 return $self->{NFILES};
359             }
360              
361             =head3 get_traj_filename
362              
363             Title : get_traj_filename
364             Usage : $traj_filename = $stream->get_traj_filename()
365             Function: returns the prefix of the trajectory files used by mbs to create the mbsout file
366             Returns : scalar
367             Args : NONE
368              
369             =cut
370              
371             sub get_traj_filename {
372 3     3 1 1402 my $self = shift;
373 3         7 return $self->{TRAJ_FILENAME};
374             }
375              
376             =head3 get_runs
377              
378             Title : get_runs
379             Usage : $runs = $stream->get_runs()
380             Function: returns the number of runs in the mbsout file
381             Returns : scalar
382             Args : NONE
383              
384             =cut
385              
386             sub get_runs {
387 3     3 1 1475 my $self = shift;
388 3         9 return $self->{RUNS};
389             }
390              
391             =head3 get_Positions
392              
393             Title : get_Positions
394             Usage : @positions = $stream->get_Positions()
395             Function: returns an array of the names of each segsite of the run of the last read hap.
396             Returns : array
397             Args : NONE
398              
399             =cut
400              
401             sub get_Positions {
402 3     3 1 1350 my $self = shift;
403 3         3 return @{ $self->{LAST_READ_POSITIONS} };
  3         13  
404             }
405              
406             =head3 get_tot_run_haps
407              
408             Title : get_tot_run_haps
409             Usage : $number_of_haps_per_run = $stream->get_tot_run_haps()
410             Function: returns the number of haplotypes (sequences) in each run of the mbsout file.
411             Returns : scalar >= 0
412             Args : NONE
413              
414             =cut
415              
416             sub get_tot_run_haps {
417 3     3 1 1468 my $self = shift;
418 3         9 return $self->{TOT_RUN_HAPS};
419             }
420              
421             =head3 get_mbs_info_line
422              
423             Title : get_mbs_info_line
424             Usage : $mbs_info_line = $stream->get_mbs_info_line()
425             Function: returns the header line of the mbsout file.
426             Returns : scalar
427             Args : NONE
428              
429             =cut
430              
431             sub get_mbs_info_line {
432 3     3 1 1496 my $self = shift;
433 3         9 return $self->{MBS_INFO_LINE};
434             }
435              
436             =head3 tot_haps
437              
438             Title : tot_haps
439             Usage : $number_of_haplotypes_in_file = $stream->tot_haps()
440             Function: returns the number of haplotypes (sequences) in the mbsout file. Information gathered from mbsout header line.
441             Returns : scalar
442             Args : NONE
443              
444             =cut
445              
446             sub get_tot_haps {
447 0     0 0 0 my $self = shift;
448 0         0 return ( $self->{TOT_RUN_HAPS} * $self->{RUNS} );
449             }
450              
451             =head3 next_run_num
452              
453             Title : next_run_num
454             Usage : $next_run_number = $stream->next_run_num()
455             Function: returns the number of the mbs run that the next haplotype (sequence)
456             will be taken from (starting at 1). Returns undef if the complete
457             file has been read.
458             Returns : scalar > 0 or undef
459             Args : NONE
460              
461             =cut
462              
463             sub get_next_run_num {
464 53     53 0 2944 my $self = shift;
465 53         86 return $self->{NEXT_RUN_NUM};
466             }
467              
468             =head3 get_last_haps_run_num
469              
470             Title : get_last_haps_run_num
471             Usage : $last_haps_run_number = $stream->get_last_haps_run_num()
472             Function: returns the number of the ms run that the last haplotype (sequence)
473             was taken from (starting at 1). Returns undef if no hap has been
474             read yet.
475             Returns : scalar > 0 or undef
476             Args : NONE
477              
478             =cut
479              
480             sub get_last_haps_run_num {
481 42     42 1 39 my $self = shift;
482 42         48 return $self->{LAST_HAPS_RUN_NUM};
483             }
484              
485             =head3 get_last_read_hap_num
486              
487             Title : get_last_read_hap_num
488             Usage : $last_read_hap_num = $stream->get_last_read_hap_num()
489             Function: returns the number (starting with 1) of the last haplotype
490             read from the mbs file
491             Returns : scalar >= 0
492             Args : NONE
493             Details : 0 means that no haplotype has been read yet.
494              
495             =cut
496              
497             sub get_last_read_hap_num {
498 45     45 1 1466 my $self = shift;
499 45         66 return $self->{LAST_READ_HAP_NUM};
500             }
501              
502             =head3 outgroup
503              
504             Title : outgroup
505             Usage : $outgroup = $stream->outgroup()
506             Function: returns '1' if the mbsout object has an outgroup. Returns '0'
507             otherwise.
508             Returns : 1 or 0, currently always 0
509             Args : NONE
510             Details : This method will return '1' only if the last population in the mbsout
511             file contains only one haplotype. If the last population is not an
512             outgroup then create the mbsout object using 'no_outgroup' as input
513             parameter for new() (see mbsout->new()).
514              
515             Currently there exists no way of introducing an outgroup into an mbs
516             file, so this function will always return '0'.
517              
518             =cut
519              
520             sub outgroup {
521 1     1 1 538 my $self = shift;
522 1 50       5 if ( $self->{NO_OUTGROUP} ) { return 0; }
  1         4  
523 0         0 else { return 0; }
524             }
525              
526             =head3 get_next_seq
527              
528             Title : get_next_seq
529             Usage : $seq = $stream->get_next_seq()
530             Function: reads and returns the next sequence (haplotype) in the stream
531             Returns : Bio::Seq object
532             Args : NONE
533             Note : This function is included only to conform to convention. It only
534             calls next_hap() and passes on that method's return value. Use
535             next_hap() instead for better performance.
536              
537             =cut
538              
539             sub get_next_seq {
540 43     43 1 2636 my $self = shift;
541 43         55 my $seqstring = $self->get_next_hap;
542              
543 43 100       67 return unless defined $seqstring;
544              
545             # Used to create unique ID;
546 42         57 my $run = $self->get_last_haps_run_num;
547              
548             # Converting numbers to letters so that the haplotypes can be stored as a
549             # seq object
550 42         51 my $rh_base_conversion_table = $self->get_base_conversion_table;
551 42         40 foreach my $base ( keys %{$rh_base_conversion_table} ) {
  42         85  
552 168         1336 $seqstring =~ s/($rh_base_conversion_table->{$base})/$base/g;
553             }
554              
555 42         86 my $last_read_hap = $self->get_last_read_hap_num;
556              
557 42         81 my $id = 'Hap_' . $last_read_hap . '_Run_' . $run;
558 42         54 my $description =
559             'Segsites '
560             . $self->get_current_run_segsites
561             . "; Positions $self->positions; Haplotype "
562             . $last_read_hap
563             . '; Run '
564             . $run . ';';
565 42         90 my $seq = $self->sequence_factory->create(
566             -seq => $seqstring,
567             -id => $id,
568             -desc => $description,
569             -alphabet => q(dna),
570             -direct => 1,
571             );
572              
573 42         109 return $seq;
574              
575             }
576              
577             =head3 get_next_hap
578              
579             Title : get_next_hap
580             Usage : $seq = $stream->get_next_hap()
581             Function: reads and returns the next sequence (haplotype) in the stream. Returns
582             void if all sequences in stream have been read.
583             Returns : Bio::Seq object
584             Args : NONE
585             Note : Use this instead of get_next_seq().
586              
587             =cut
588              
589             sub get_next_hap {
590 47     47 1 1275 my $self = shift;
591              
592             # Let's figure out how many haps to read from the input file so that
593             # we get back to the beginning of the next run.
594              
595 47         49 my $end_run = 0;
596 47 100       89 if ( $self->{TOT_RUN_HAPS} == $self->{LAST_READ_HAP_NUM} + 1 ) {
597 8         9 $end_run = 1;
598             }
599              
600             # Setting last_haps_run_num
601 47         59 $self->{LAST_HAPS_RUN_NUM} = $self->get_next_run_num;
602              
603 47         55 my $fh_IN = $self->{_filehandle};
604              
605             my ($seqstring) =
606 47         74 $self->_get_next_clean_hap( $self->{_filehandle}, 1, $end_run );
607              
608 47         68 return $seqstring;
609             }
610              
611             =head3 get_next_run
612              
613             Title : get_next_run
614             Usage : @seqs = $stream->get_next_run()
615             Function: reads and returns all the remaining sequences (haplotypes) in the mbs
616             run of the next sequence.
617             Returns : array of Bio::Seq objects
618             Args : NONE
619              
620             =cut
621              
622             sub get_next_run {
623 9     9 1 3762 my $self = shift;
624              
625             # Let's figure out how many haps to read from the input file so that
626             # we get back to the beginning of the next run.
627              
628 9         15 my $haps_to_pull = $self->{TOT_RUN_HAPS} - $self->{LAST_READ_HAP_NUM};
629              
630             # Read those haps from the input file
631             # Next hap read will be the first hap of the next run.
632              
633 9         11 my @seqs;
634 9         19 for ( 1 .. $haps_to_pull ) {
635 37         53 my $seq = $self->get_next_seq;
636 37 50       59 next unless defined $seq;
637              
638 37         51 push @seqs, $seq;
639             }
640              
641 9         30 return @seqs;
642             }
643              
644             =head2 METHODS TO RETRIEVE CONSTANTS
645              
646             =head3 base_conversion_table
647              
648             Title : get_base_conversion_table
649             Usage : $table_hash_ref = $stream->get_base_conversion_table()
650             Function: returns a reference to a hash. The keys of the hash are the letters
651             'A','T','G','C'. The values associated with each key are the value
652             that each letter in the sequence of a seq object returned by a
653             Bio::SeqIO::mbsout stream should be translated to.
654             Returns : reference to a hash
655             Args : NONE
656             Synopsis:
657            
658             # retrieve the Bio::Seq object's sequence
659             my $haplotype = $seq->seq;
660             my $rh_base_conversion_table = $stream->get_base_conversion_table();
661            
662             # need to convert all letters to their corresponding numbers.
663             foreach my $base (keys %{$rh_base_conversion_table}){
664             $haplotype =~ s/($base)/$rh_base_conversion_table->{$base}/g;
665             }
666            
667             # $haplotype is now an ms style haplotype. (e.g. '100101101455')
668            
669             =cut
670              
671             sub get_base_conversion_table {
672 45     45 0 1219 my $self = shift;
673 45         63 return $self->{BASE_CONVERSION_TABLE_HASH_REF};
674             }
675              
676             ##############################################################################
677             ## subs for internal use only
678             ##############################################################################
679              
680             sub _get_next_clean_hap {
681              
682             #By Warren Kretzschmar
683              
684             # return the next non-empty line from file handle (chomped line)
685             # skipps to the next run if '//' is encountered
686 50     50   67 my ( $self, $fh, $times, $end_run ) = @_;
687 50         50 my @data;
688              
689 50 50       71 unless ( defined $fh ) { return; }
  0         0  
690              
691 50 50 33     130 unless ( defined $times && $times > 0 ) {
692 0         0 $times = 1;
693             }
694              
695 50 100       70 if ( defined $self->{BUFFER_HAP} ) {
696 8         10 push @data, $self->{BUFFER_HAP};
697 8         10 $self->{BUFFER_HAP} = undef;
698 8         8 $self->{LAST_READ_HAP_NUM}++;
699 8         9 $times--;
700             }
701              
702 50         79 while ( 1 <= $times-- ) {
703              
704             # Find next clean line
705 42         123 my $data = <$fh>;
706 42 100       74 last if !defined($data);
707 40         46 chomp $data;
708 40         116 while ( $data !~ /./ ) {
709 0         0 $data = <$fh>;
710 0         0 chomp $data;
711             }
712              
713             # If the next run is encountered here, then we have a programming
714             # or format error
715 40 50       69 if ( $data eq '//' ) { $self->throw("'//' found when not expected\n") }
  0         0  
716              
717 40         43 $self->{LAST_READ_HAP_NUM}++;
718 40         80 push @data, $data;
719             }
720              
721 50 100       66 if ($end_run) {
722 11         18 $self->_load_run_info($fh);
723             }
724              
725 50         85 return (@data);
726             }
727              
728             sub _load_run_info {
729              
730 11     11   13 my ( $self, $fh ) = @_;
731              
732 11         27 my $data = <$fh>;
733              
734             # In this case we are at EOF
735 11 100       17 if ( !defined($data) ) { $self->{NEXT_RUN_NUM} = undef; return; }
  2         4  
  2         3  
736              
737 9         7 chomp $data;
738              
739 9         17 while ( $data !~ /./ ) {
740 3         6 $data = <$fh>;
741              
742             # In this case we are at EOF
743 3 50       4 if ( !defined($data) ) { $self->{NEXT_RUN_NUM} = undef; return; }
  0         0  
  0         0  
744 3         8 chomp $data;
745             }
746              
747             # If the next run is encountered, then skip to the next hap and save it in
748             # the buffer.
749 9 50       23 if ( $data =~ /^\/\// ) {
750 9         12 $self->{NEXT_RUN_NUM}++;
751 9         10 $self->{LAST_READ_HAP_NUM} = 0;
752 9         70 my @data = split( /\s+/, $data );
753 9         22 my @temp = split( /\/\//, $data[0] );
754 9         15 @temp = split( /-/, $temp[0] );
755 9         14 $self->{LAST_READ_TRAJ_FILE} = $temp[0];
756 9         10 $self->{LAST_LEAD_TRAJ_FILE_REPLICATION} = $temp[1];
757 9         23 $self->{LAST_READ_ALLELES} = \@data[ 2 .. $#data ];
758              
759 9         23 for ( 1 .. 3 ) {
760 27         43 $data = <$fh>;
761 27         56 while ( $data !~ /./ ) {
762 3         7 $data = <$fh>;
763             }
764 27         32 chomp $data;
765              
766 27         76 @data = split( /\s+/, $data );
767              
768 27 100       54 if ( $_ eq '1' ) {
    100          
769 9         16 $self->{LAST_READ_SEGSITES} = $data[1];
770             }
771             elsif ( $_ eq '2' ) {
772 9         34 $self->{LAST_READ_POSITIONS} = [ @data[ 1 .. $#data ] ];
773             }
774             else {
775 9 50       15 if ( !defined($data) ) {
776 0         0 $self->throw("run $self->{NEXT_RUN_NUM} has no haps./n");
777             }
778 9         19 $self->{BUFFER_HAP} = $data;
779             }
780             }
781             }
782 0           else { $self->throw("'//' not encountered when expected\n") }
783              
784             }
785             1;