File Coverage

Bio/Tree/Compatible.pm
Criterion Covered Total %
statement 81 92 88.0
branch 6 14 42.8
condition 0 3 0.0
subroutine 8 8 100.0
pod 5 5 100.0
total 100 122 81.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tree::Compatible
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Gabriel Valiente
7             #
8             # Copyright Gabriel Valiente
9             #
10             # You may distribute this module under the same terms as Perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tree::Compatible - Testing compatibility of phylogenetic trees
17             with nested taxa.
18              
19             =head1 SYNOPSIS
20              
21             use Bio::Tree::Compatible;
22             use Bio::TreeIO;
23             my $input = Bio::TreeIO->new('-format' => 'newick',
24             '-file' => 'input.tre');
25             my $t1 = $input->next_tree;
26             my $t2 = $input->next_tree;
27              
28             my ($incompat, $ilabels, $inodes) = Bio::Tree::Compatible::is_compatible($t1,$t2);
29             if ($incompat) {
30             my %cluster1 = %{ Bio::Tree::Compatible::cluster_representation($t1) };
31             my %cluster2 = %{ Bio::Tree::Compatible::cluster_representation($t2) };
32             print "incompatible trees\n";
33             if (scalar(@$ilabels)) {
34             foreach my $label (@$ilabels) {
35             my $node1 = $t1->find_node(-id => $label);
36             my $node2 = $t2->find_node(-id => $label);
37             my @c1 = sort @{ $cluster1{$node1} };
38             my @c2 = sort @{ $cluster2{$node2} };
39             print "label $label";
40             print " cluster"; map { print " ",$_ } @c1;
41             print " cluster"; map { print " ",$_ } @c2; print "\n";
42             }
43             }
44             if (scalar(@$inodes)) {
45             while (@$inodes) {
46             my $node1 = shift @$inodes;
47             my $node2 = shift @$inodes;
48             my @c1 = sort @{ $cluster1{$node1} };
49             my @c2 = sort @{ $cluster2{$node2} };
50             print "cluster"; map { print " ",$_ } @c1;
51             print " properly intersects cluster";
52             map { print " ",$_ } @c2; print "\n";
53             }
54             }
55             } else {
56             print "compatible trees\n";
57             }
58              
59             =head1 DESCRIPTION
60              
61             NB: This module has exclusively class methods that work on Bio::Tree::TreeI
62             objects. An instance of Bio::Tree::Compatible cannot itself represent a tree,
63             and so typically there is no need to create one.
64              
65             Bio::Tree::Compatible is a Perl tool for testing compatibility of
66             phylogenetic trees with nested taxa represented as Bio::Tree::Tree
67             objects. It is based on a recent characterization of ancestral
68             compatibility of semi-labeled trees in terms of their cluster
69             representations.
70              
71             A semi-labeled tree is a phylogenetic tree with some of its internal
72             nodes labeled, and it can represent a classification tree as well as a
73             phylogenetic tree with nested taxa, with labeled internal nodes
74             corresponding to taxa at a higher level of aggregation or nesting than
75             that of their descendents.
76              
77             Two semi-labeled trees are compatible if their topological
78             restrictions to the common labels are such that for each node label,
79             the smallest clusters containing it in each of the trees coincide and,
80             furthermore, no cluster in one of the trees properly intersects a
81             cluster of the other tree.
82              
83             Future extensions of Bio::Tree::Compatible include a
84             Bio::Tree::Supertree module for combining compatible phylogenetic
85             trees with nested taxa into a common supertree.
86              
87             =head1 FEEDBACK
88              
89             =head2 Mailing Lists
90              
91             User feedback is an integral part of the evolution of this and other
92             Bioperl modules. Send your comments and suggestions preferably to the
93             Bioperl mailing list. Your participation is much appreciated.
94              
95             bioperl-l@bioperl.org - General discussion
96             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
97              
98             =head2 Support
99              
100             Please direct usage questions or support issues to the mailing list:
101              
102             I
103              
104             rather than to the module maintainer directly. Many experienced and
105             reponsive experts will be able look at the problem and quickly
106             address it. Please include a thorough description of the problem
107             with code and data examples if at all possible.
108              
109             =head2 Reporting Bugs
110              
111             Report bugs to the Bioperl bug tracking system to help us keep track
112             of the bugs and their resolution. Bug reports can be submitted via the
113             web:
114              
115             https://github.com/bioperl/bioperl-live/issues
116              
117             =head1 SEE ALSO
118              
119             =over
120              
121             =item * Philip Daniel and Charles Semple. Supertree Algorithms for
122             Nested Taxa. In: Olaf R. P. Bininda-Emonds (ed.) Phylogenetic
123             Supertrees: Combining Information to Reveal the Tree of Life,
124             I, vol. 4, chap. 7, pp. 151-171. Kluwer (2004).
125              
126             =item * Charles Semple, Philip Daniel, Wim Hordijk, Roderic
127             D. M. Page, and Mike Steel: Supertree Algorithms for Ancestral
128             Divergence Dates and Nested Taxa. Bioinformatics B<20>(15), 2355-2360
129             (2004).
130              
131             =item * Merce Llabres, Jairo Rocha, Francesc Rossello, and Gabriel
132             Valiente: On the Ancestral Compatibility of Two Phylogenetic Trees
133             with Nested Taxa. J. Math. Biol. B<53>(3), 340-364 (2006).
134              
135             =back
136              
137             =head1 AUTHOR - Gabriel Valiente
138              
139             Email valiente@lsi.upc.edu
140              
141             =head1 APPENDIX
142              
143             The rest of the documentation details each of the object methods.
144              
145             =cut
146              
147             package Bio::Tree::Compatible;
148 1     1   462 use strict;
  1         1  
  1         25  
149              
150             # Object preamble - inherits from Bio::Root::Root
151              
152 1     1   4 use Set::Scalar;
  1         1  
  1         36  
153              
154 1     1   4 use base qw(Bio::Root::Root);
  1         9  
  1         258  
155              
156             =head2 postorder_traversal
157              
158             Title : postorder_traversal
159             Usage : my @nodes = @{ $tree->postorder_traversal }
160             Function: Return list of nodes in postorder
161             Returns : reference to array of Bio::Tree::Node
162             Args : none
163              
164             For example, the postorder traversal of the tree
165             C<(((A,B)C,D),(E,F,G));> is a reference to an array of nodes with
166             internal_id 0 through 9, because the Newick standard representation
167             for phylogenetic trees is based on a postorder traversal.
168              
169             +---A +---0
170             | |
171             +---+---C +---4---2
172             | | | | | |
173             | | +---B | | +---1
174             | | | |
175             + +-------D 9 +-------3
176             | |
177             | +-----E | +-----5
178             | | | |
179             +-----+-----F +-----8-----6
180             | |
181             +-----G +-----7
182              
183             =cut
184              
185             sub postorder_traversal {
186 14     14 1 16 my($self) = @_;
187 14         16 my @stack;
188             my @queue;
189 14         23 push @stack, $self->get_root_node;
190 14         20 while (@stack) {
191 103         116 my $node = pop @stack;
192 103         96 push @queue, $node;
193 103         158 foreach my $child ($node->each_Descendent(-sortby => 'internal_id')) {
194 89         146 push @stack, $child;
195             }
196             }
197 14         25 my @postorder = reverse @queue;
198 14         29 return \@postorder;
199             }
200              
201             =head2 cluster_representation
202              
203             Title : cluster_representation
204             Usage : my %cluster = %{ $tree->cluster_representation }
205             Function: Compute the cluster representation of a tree
206             Returns : reference to hash of array of string indexed by
207             Bio::Tree::Node
208             Args : none
209              
210             For example, the cluster representation of the tree
211             C<(((A,B)C,D),(E,F,G));> is a reference to a hash associating an array
212             of string (descendent labels) to each node, as follows:
213              
214             0 --> [A]
215             1 --> [B]
216             2 --> [A,B,C]
217             3 --> [D]
218             4 --> [A,B,C,D]
219             5 --> [E]
220             6 --> [F]
221             7 --> [G]
222             8 --> [E,F,G]
223             9 --> [A,B,C,D,E,F,G]
224              
225             =cut
226              
227             sub cluster_representation {
228 4     4 1 7 my ($tree) = @_;
229 4         3 my %cluster;
230 4         6 my @postorder = @{ postorder_traversal($tree) };
  4         10  
231 4         7 foreach my $node ( @postorder ) {
232 28         42 my @labeled = map { $_->id } grep { $_->id } $node->get_Descendents;
  33         42  
  43         57  
233 28 100       41 push @labeled, $node->id if $node->id;
234 28         61 $cluster{$node} = \@labeled;
235             }
236 4         17 return \%cluster;
237             }
238              
239             =head2 common_labels
240              
241             Title : common_labels
242             Usage : my $labels = $tree1->common_labels($tree2);
243             Function: Return set of common node labels
244             Returns : Set::Scalar
245             Args : Bio::Tree::Tree
246              
247             For example, the common labels of the tree C<(((A,B)C,D),(E,F,G));>
248             and the tree C<((A,B)H,E,(J,(K)G)I);> are: C<[A,B,E,G]>.
249              
250             +---A +---A
251             | |
252             +---+---C +-------H
253             | | | | |
254             | | +---B | +---B
255             | | |
256             + +-------D +-----------E
257             | |
258             | +-----E | +-------J
259             | | | |
260             +-----+-----F +---I
261             | |
262             +-----G +---G---K
263              
264             =cut
265              
266             sub common_labels {
267 3     3 1 10 my($self,$arg) = @_;
268 3         9 my @labels1 = map { $_->id } grep { $_->id } $self->get_nodes;
  15         17  
  24         28  
269 3         19 my $common = Set::Scalar->new( @labels1 );
270 3         382 my @labels2 = map { $_->id } grep { $_->id } $arg->get_nodes;
  16         22  
  23         54  
271 3         8 my $temp = Set::Scalar->new( @labels2 );
272 3         231 return $common->intersection($temp);
273             }
274              
275             =head2 topological_restriction
276              
277             Title : topological_restriction
278             Usage : $tree->topological_restriction($labels)
279             Function: Compute the topological restriction of a tree to a subset
280             of node labels
281             Returns : Bio::Tree::Tree
282             Args : Set::Scalar
283              
284             For example, the topological restrictions of each of the trees
285             C<(((A,B)C,D),(E,F,G));> and C<((A,B)H,E,(J,(K)G)I);> to the labels
286             C<[A,B,E,G]> are as follows:
287              
288             +---A +---A
289             | |
290             +---+---+ +---+
291             | | | |
292             | +---B | +---B
293             + |
294             | +---E +-------E
295             | | |
296             +-------+ +---+---G
297             |
298             +---G
299              
300             =cut
301              
302             sub topological_restriction {
303 6     6 1 1521 my ($tree, $labels) = @_;
304 6         7 for my $node ( @{ postorder_traversal($tree) } ) {
  6         9  
305 47 50       61 unless (ref($node)) { # skip $node if already removed
306 0         0 my @cluster = map { $_->id } grep { $_->id } $node->get_Descendents;
  0         0  
  0         0  
307 0 0       0 push @cluster, $node->id if $node->id;
308 0         0 my $cluster = Set::Scalar->new(@cluster);
309 0 0       0 if ($cluster->is_disjoint($labels)) {
310 0         0 $tree->remove_Node($node);
311             } else {
312 0 0 0     0 if ($node->id and not $labels->has($node->id)) {
313 0         0 $node->{'_id'} = undef;
314             }
315             }
316             }
317             }
318             }
319              
320             =head2 is_compatible
321              
322             Title : is_compatible
323             Usage : $tree1->is_compatible($tree2)
324             Function: Test compatibility of two trees
325             Returns : boolean
326             Args : Bio::Tree::Tree
327              
328             For example, the topological restrictions of the trees
329             C<(((A,B)C,D),(E,F,G));> and C<((A,B)H,E,(J,(K)G)I);> to their common
330             labels, C<[A,B,E,G]>, are compatible. The respective cluster
331             representations are as follows:
332              
333             [A] [A]
334             [B] [B]
335             [E] [E]
336             [G] [G]
337             [A,B] [A,B]
338             [E,G] [A,B,E,G]
339             [A,B,E,G]
340              
341             As a second example, the trees C<(A,B);> and C<((B)A);> are
342             incompatible. Their respective cluster representations are as follows:
343              
344             [A] [B]
345             [B] [A,B]
346             [A,B]
347              
348             The reason is, the smallest cluster containing label C is C<[A]> in
349             the first tree but C<[A,B]> in the second tree.
350              
351             +---A A---B
352             |
353             +
354             |
355             +---B
356              
357             As a second example, the trees C<(((B,A),C),D);> and C<((A,(D,B)),C);>
358             are also incompatible. Their respective cluster representations are as
359             follows:
360              
361             [A] [A]
362             [B] [B]
363             [C] [C]
364             [D] [D]
365             [A,B] [B,D]
366             [A,B,C] [A,B,D]
367             [A,B,C,D] [A,B,C,D]
368              
369             The reason is, cluster C<[A,B]> properly intersects cluster
370             C<[B,D]>. There are further incompatibilities between these trees:
371             C<[A,B,C]> properly intersects both C<[B,D]> and C<[A,B,D]>.
372              
373             +---B +-------A
374             | |
375             +---+ +---+ +---D
376             | | | | |
377             +---+ +---A | +---+
378             | | + |
379             + +-------C | +---B
380             | |
381             +-----------D +-----------C
382              
383             =cut
384              
385             sub is_compatible {
386 2     2 1 10 my ($tree1, $tree2) = @_;
387 2         8 my $common = $tree1->Bio::Tree::Compatible::common_labels($tree2);
388 2         981 $tree1->Bio::Tree::Compatible::topological_restriction($common);
389 2         6 $tree2->Bio::Tree::Compatible::topological_restriction($common);
390 2         3 my @postorder1 = @{ postorder_traversal($tree1) };
  2         3  
391 2         3 my @postorder2 = @{ postorder_traversal($tree2) };
  2         3  
392 2         3 my %cluster1 = %{ cluster_representation($tree1) };
  2         5  
393 2         6 my %cluster2 = %{ cluster_representation($tree2) };
  2         3  
394 2         5 my $incompat = 0; # false
395 2         3 my @labels;
396 2         5 foreach my $label ( $common->elements ) {
397 8         2886 my $node1 = $tree1->find_node(-id => $label);
398 8         10 my @labels1 = @{ $cluster1{$node1} };
  8         19  
399 8         17 my $cluster1 = Set::Scalar->new(@labels1);
400 8         444 my $node2 = $tree2->find_node(-id => $label);
401 8         11 my @labels2 = @{ $cluster2{$node2} };
  8         17  
402 8         22 my $cluster2 = Set::Scalar->new(@labels2);
403 8 50       436 unless ( $cluster1->is_equal($cluster2) ) {
404 0         0 $incompat = 1; # true
405 0         0 push @labels, $label;
406             }
407             }
408 2         932 my @nodes;
409 2         4 foreach my $node1 ( @postorder1 ) {
410 14         5962 my @labels1 = @{ $cluster1{$node1} };
  14         35  
411 14         46 my $cluster1 = Set::Scalar->new(@labels1);
412 14         761 foreach my $node2 ( @postorder2 ) {
413 98         36345 my @labels2 = @{$cluster2{$node2} };
  98         241  
414 98         176 my $cluster2 = Set::Scalar->new(@labels2);
415 98 100       5173 if ($cluster1->is_properly_intersecting($cluster2)) {
416 3         1569 $incompat = 1; # true
417 3         8 push @nodes, $node1, $node2;
418             }
419             }
420             }
421 2         1107 return ($incompat, \@labels, \@nodes);
422             }
423              
424             1;