File Coverage

blib/lib/Bio/Phylo/Parsers/Adjacency.pm
Criterion Covered Total %
statement 57 58 98.2
branch 13 18 72.2
condition n/a
subroutine 5 5 100.0
pod n/a
total 75 81 92.5


line stmt bran cond sub pod time code
1             package Bio::Phylo::Parsers::Adjacency;
2 1     1   5 use strict;
  1         2  
  1         25  
3 1     1   4 use base 'Bio::Phylo::Parsers::Abstract';
  1         1  
  1         259  
4 1     1   6 use Bio::Phylo::Util::Exceptions 'throw';
  1         1  
  1         38  
5 1     1   5 use Bio::Phylo::Util::CONSTANT qw'/looks_like/ :namespaces :objecttypes';
  1         2  
  1         633  
6              
7             =head1 NAME
8              
9             Bio::Phylo::Parsers::Adjacency - Parser used by Bio::Phylo::IO, no serviceable parts inside
10              
11             =head1 DESCRIPTION
12              
13             This module parses a tree structure from tabular data organized as an "adjacency
14             list", i.e. child -> parent relationships. The table should at least have the
15             following columns: 'child' and 'parent'. 'length' is interpreted as branch
16             length. Columns starting with 'node:' are assigned as semantic annotations
17             to the focal node, columns starting with 'branch:' are assigned to the focal
18             branch. Records need to be listed in pre-order, so that references to parent
19             nodes can be resolved immediately. Consequently, the root is the first record,
20             without a parent. Example:
21              
22             ((A:1,B:2)n1:3,C:4)n2:0;
23              
24             Becomes (with an extra example annotation):
25              
26             child parent length node:dcterms:identifier
27             n2 0 35462
28             n1 n2 3 34987
29             A n1 1 73843
30             B n1 2 98743
31             C n2 4 39847
32              
33             =cut
34              
35             sub _parse {
36 1     1   2 my $self = shift;
37 1         4 my $fh = $self->_handle;
38 1         4 my $fac = $self->_factory;
39 1         4 my $log = $self->_logger;
40 1         6 my $tree = $fac->create_tree;
41 1         11 my $ns = $self->_args->{'-namespaces'};
42 1 50       3 if ( $ns ) {
43 1         7 $tree->set_namespaces( %{ $ns } );
  1         9  
44             }
45 1         3 my ( @header, %node_cols );
46 1         0 my %node_for_id;
47 1         7 LINE: while (<$fh>) {
48 6 100       19 unless ( scalar(keys(%node_for_id)) % 1000 ) {
49 2         12 $log->debug("processed node " . scalar(keys(%node_for_id)));
50             }
51 6         12 chomp;
52            
53             # the first line is the header row
54 6 100       13 if ( not @header ) {
55 1         7 @header = split /\t/, $_;
56 1         4 for my $col ( @header ) {
57 4 100       12 if ( $col =~ /^node:(.+)$/ ) {
58 1         4 my $predicate = $1;
59 1         5 $node_cols{$col} = $predicate;
60             }
61             }
62 1         4 next LINE;
63             }
64            
65             # this is a record
66 5         22 my @fields = split /\t/, $_;
67 5         15 my %record = map { $header[$_] => $fields[$_] } 0 .. $#header;
  20         52  
68            
69             # create node
70 5         13 my $name = $record{'child'};
71 5         6 my $pname = $record{'parent'};
72 5         30 my $node = $fac->create_node( '-name' => $name );
73 5         22 $tree->insert($node);
74 5         12 $node_for_id{$name} = $node;
75            
76             # build the tree structure
77 5 100       16 if ( my $parent = $node_for_id{$pname} ) {
78 4         11 $node->set_parent($parent);
79             }
80            
81             # assign branch length, if defined
82 5 50       10 if ( defined $record{'length'} ) {
83 5         14 $node->set_branch_length($record{'length'});
84             }
85            
86             # now see if there are any node columns
87 5         14 for my $col ( keys %node_cols ) {
88 5         11 my $value = $record{$col};
89 5 50       9 if ( $value ) {
90 5         9 my $predicate = $node_cols{$col};
91 5 50       33 if ( $predicate =~ /^(.+)?:.+$/ ) {
92 5         14 my $prefix = $1;
93 5 50       17 if ( my $ns = $Bio::Phylo::Util::CONSTANT::NS->{$prefix} ) {
94 5         37 $node->add_meta(
95             $fac->create_meta(
96             '-namespaces' => { $prefix => $ns },
97             '-triple' => { $predicate => $value }
98             )
99             );
100             }
101             else {
102 0         0 $log->warn("No namespace for prefix $prefix");
103             }
104             }
105             }
106             }
107             }
108 1         11 my $forest = $fac->create_forest;
109 1         5 $forest->insert($tree);
110 1         7 return $forest;
111             }
112              
113             =head1 SEE ALSO
114              
115             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
116             for any user or developer questions and discussions.
117              
118             =over
119              
120             =item L<Bio::Phylo::IO>
121              
122             The adjacency parser is called by the L<Bio::Phylo::IO|Bio::Phylo::IO> object.
123             Look there to learn how to parse trees in general
124              
125             =item L<Bio::Phylo::Manual>
126              
127             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>
128              
129             =back
130              
131             =head1 CITATION
132              
133             If you use Bio::Phylo in published research, please cite it:
134              
135             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
136             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
137             I<BMC Bioinformatics> B<12>:63.
138             L<http://dx.doi.org/10.1186/1471-2105-12-63>
139              
140             =cut
141              
142             1;