File Coverage

blib/lib/Bio/Phylo/Parsers/Phylip.pm
Criterion Covered Total %
statement 27 29 93.1
branch 5 8 62.5
condition 5 8 62.5
subroutine 4 4 100.0
pod n/a
total 41 49 83.6


line stmt bran cond sub pod time code
1             package Bio::Phylo::Parsers::Phylip;
2 1     1   6 use strict;
  1         1  
  1         26  
3 1     1   4 use base 'Bio::Phylo::Parsers::Abstract';
  1         1  
  1         268  
4 1     1   6 use Bio::Phylo::Util::Exceptions 'throw';
  1         2  
  1         249  
5              
6             =head1 NAME
7              
8             Bio::Phylo::Parsers::Phylip - Parser used by Bio::Phylo::IO, no serviceable parts inside
9              
10             =head1 DESCRIPTION
11              
12             This module is used for parsing PHYLIP character state matrix files. At present this only
13             works on non-interleaved files. As PHYLIP files don't indicate what data type they are you
14             should indicate this as an argument to the Bio::Phylo::IO::parse function, i.e.:
15              
16             use Bio::Phylo::IO 'parse';
17             my $file = shift @ARGV;
18             my $type = 'dna'; # or rna, protein, restriction, standard, continuous
19             my $matrix = parse(
20             '-file' => $file,
21             '-format' => 'phylip',
22             '-type' => $type,
23             )->[0];
24             print ref($matrix); # probably prints Bio::Phylo::Matrices::Matrix;
25              
26             =cut
27              
28             sub _parse {
29 4     4   7 my $self = shift;
30 4         10 my $factory = $self->_factory;
31 4   50     11 my $type = $self->_args->{'-type'} || 'standard';
32 4         9 my $handle = $self->_handle;
33 4         19 my $matrix = $factory->create_matrix( '-type' => $type );
34 4         5 my ( $ntax, $nchar );
35 4         20 LINE: while (<$handle>) {
36 20         30 my ( $name, $seq );
37 20 100 66     139 if ( /^\s*(\d+)\s+(\d+)\s*$/ && !$ntax && !$nchar ) {
    50 66        
38 4         17 ( $ntax, $nchar ) = ( $1, $2 );
39 4         13 next LINE;
40             }
41             elsif ( /^\s*(\S+)\s+(.+)$/ ) {
42 16         50 ( $name, $seq ) = ( $1, $2 );
43 16         32 $seq =~ s/\s//g;
44             }
45             else {
46 0         0 $name = substr( $_, 0, 10 );
47 0         0 $seq = substr( $_, 10 );
48             }
49 16         47 $matrix->insert(
50             $factory->create_datum(
51             '-type' => $type,
52             '-name' => $name,
53             '-char' => $matrix->get_type_object->split($seq),
54             )
55             );
56             }
57 4         11 my ( $my_nchar, $my_ntax ) = ( $matrix->get_nchar, $matrix->get_ntax );
58 4 50       13 $nchar != $my_nchar
59             && throw 'BadFormat' => "observed ($my_nchar) != expected ($nchar) nchar";
60 4 50       8 $ntax != $my_ntax
61             && throw 'BadFormat' => "observed ($my_ntax) != expected ($ntax) ntax";
62 4         20 return $matrix;
63             }
64              
65             # podinherit_insert_token
66              
67             =head1 SEE ALSO
68              
69             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
70             for any user or developer questions and discussions.
71              
72             =over
73              
74             =item L<Bio::Phylo::IO>
75              
76             The PHYLIP parser is called by the L<Bio::Phylo::IO> object.
77             Look there for examples.
78              
79             =item L<Bio::Phylo::Manual>
80              
81             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.
82              
83             =back
84              
85             =head1 CITATION
86              
87             If you use Bio::Phylo in published research, please cite it:
88              
89             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
90             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
91             I<BMC Bioinformatics> B<12>:63.
92             L<http://dx.doi.org/10.1186/1471-2105-12-63>
93              
94             =cut
95              
96             1;