File Coverage

Bio/PhyloNetwork.pm
Criterion Covered Total %
statement 610 696 87.6
branch 123 164 75.0
condition 65 84 77.3
subroutine 70 76 92.1
pod 31 65 47.6
total 899 1085 82.8


line stmt bran cond sub pod time code
1             #
2             # Module for Bio::PhyloNetwork
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Gabriel Cardona
7             #
8             # Copyright Gabriel Cardona, Gabriel Valiente
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::PhyloNetwork - Module to compute with Phylogenetic Networks
17              
18             =head1 SYNOPSIS
19              
20             use Bio::PhyloNetwork;
21              
22             # Create a PhyloNetwork object from a eNewick string
23             my $net1=Bio::PhyloNetwork->new(
24             -eNewick=>'t0:((H1,(H2,l2)),H2); H1:((H3,l1)); H2:((H3,(l3,H1))); H3:(l4);'
25             );
26              
27             # Print all available data
28             print $net1;
29              
30             # Rebuild $net1 from its mu_data
31             my %mudata=$net1->mudata();
32             my $net2=Bio::PhyloNetwork->new(-mudata=>\%mudata,-numleaves=>4);
33             print $net2;
34             print "d=".$net1->mu_distance($net2)."\n";
35              
36             # Get another one and compute distance
37             my $net3=Bio::PhyloNetwork->new(
38             -eNewick=>'(l2,((l1,(H1,l4)),H1))r; (l3)H1;'
39             );
40             print "d=".$net1->mu_distance($net3)."\n";
41              
42             # ...and find an optimal alignment w.r.t. the Manhattan distance (default)
43             my ($weight,%alignment)=$net1->optimal_alignment($net3);
44             print "weight:$weight\n";
45             foreach my $node1 (keys %alignment) {
46             print "$node1 => ".$alignment{$node1}."\n";
47             }
48             # ...or the Hamming distance
49              
50             my ($weightH,%alignmentH)=$net1->optimal_alignment($net3,-metric=>'Hamming');
51             print "weight:$weightH\n";
52             foreach my $node1 (keys %alignmentH) {
53             print "$node1 => ".$alignmentH{$node1}."\n";
54             }
55              
56             # Test for time consistency of $net1
57             if ($net1->is_time_consistent) {
58             print "net1 is time consistent\n"
59             }
60             else {
61             print "net1 is not time consistent\n"
62             }
63              
64             # create a network from the list of edges
65             my $net4=Bio::PhyloNetwork->new(-edges=>
66             [qw(r s r t s u s c t c t v u b u l3 u b v b v l4 b l2 c l1)]);
67              
68             # Test for time consistency of $net3
69             if ($net4->is_time_consistent) {
70             print "net4 is time consistent\n"
71             }
72             else {
73             print "net4 is not time consistent\n"
74             }
75              
76             # And print all information on net4
77             print $net4;
78              
79             # Compute some tripartitions
80             my %triparts=$net1->tripartitions();
81              
82             # Now these are stored
83             print $net1;
84              
85             # And can compute the tripartition error
86             print "dtr=".$net1->tripartition_error($net3)."\n";
87              
88             =head1 DESCRIPTION
89              
90             =head2 Phylogenetic Networks
91              
92             This is a module to work with phylogenetic networks. Phylogenetic networks
93             have been studied over the last years as a richer model of the evolutionary
94             history of sets of organisms than phylogenetic trees, because they take not
95             only mutation events but also recombination and horizontal gene transfer
96             events into account.
97              
98             The natural model for describing the evolutionary
99             history of a set of sequences under recombination events is a DAG, hence
100             this package relies on the package Graph::Directed to represent the
101             underlying graph of a phylogenetic network. We refer the reader to [CRV1,CRV2]
102             for formal definitions related to phylogenetic networks.
103              
104             =head2 eNewick description
105              
106             With this package, phylogenetic networks can be given by its eNewick
107             string. This description appeared in other packages related to
108             phylogenetic networks (see [PhyloNet] and [NetGen]); in fact, these two
109             packages use different descriptions. The Bio::PhyloNetwork
110             package allows both of them, but uses the second one in its output.
111              
112             The first approach [PhyloNet] goes as follows: For each hybrid node H, say with
113             parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k+1 different
114             nodes; let each of the first k copies be a child of one of the u_1,...,u_k
115             (one for each) and have no children (hence we will have k extra leaves);
116             as for the last copy, let it have no parents and have v_1,...,v_l be its
117             children. This way we get a forest; each of the trees will be rooted at either
118             one root of the phylogenetic network or a hybrid node of it; the set of leaves
119             (of the whole forest) will be the set of leaves of the original network
120             together with the set of hybrid nodes (each of them repeated as many times
121             as its in-degree). Then, the eNewick representation of the phylogenetic network
122             will be the Newick representation of all the trees in the obtained forest,
123             each of them with its root labeled.
124              
125             The second approach [NetGen] goes as follows: For each hybrid node H, say with
126             parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k different
127             nodes; let the first copy be a child of u_1 and have all v_1,v_2,...v_l as
128             its children; let the other copies be child of u_2,...,u_k (one for each)
129             and have no children. This way, we get a tree whose set of leaves is the
130             set of leaves of the original network together with the set of hybrid nodes
131             (possibly repeated). Then the Newick string of the obtained tree (note that
132             some internal nodes will be labeled and some leaves will be repeated) is
133             the eNewick string of the phylogenetic network.
134              
135             For example, consider the network depicted below:
136              
137             r
138             / \
139             / \
140             U V
141             / \ / \
142             1 \ / 3
143             H
144             |
145             2
146              
147             If the first approach is taken, we get the forest:
148              
149             r
150             / \
151             / \
152             U V
153             / \ / \
154             1 H H 3
155             |
156             H
157             |
158             2
159              
160             Hence, the eNewick string is '((1,H),(H,3))r; (2)H;'.
161              
162             As for the second one, one gets the tree:
163              
164             r
165             / \
166             / \
167             U V
168             / \ / \
169             1 H | 3
170             H
171             |
172             2
173              
174             Hence, the eNewick string is '((1,H),((2)H,3))r;'.
175              
176             Note: when rooting a tree, this package allows the notations
177             '(subtree,subtree,...)root' as well as 'root:(subtree,subtree,...)', but
178             the first one is used when writing eNewick strings.
179              
180             =head2 Tree-child phylogenetic networks
181              
182             Tree-child (TC) phylogenetic networks are a special class of phylogenetic
183             networks for which a distance, called mu-distance, is defined [CRV2]
184             based on certain data (mu-data) associated to every node.
185             Moreover, this distance extends the
186             Robinson-Foulds on phylogenetic trees. This package allows testing for a
187             phylogenetic network if it is TC and computes mu-distances between networks
188             over the same set of leaves.
189              
190             Moreover, the mu-data allows one to define the optimal
191             (in some precise sense) alignment between networks
192             over the same set of leaves. This package also computes this optimal alignment.
193              
194             =head2 Tripartitions
195              
196             Although tripartitions (see [CRV1] and the references therein) do not allow
197             to define distances, this package outputs tripartitions and computes a weak
198             form of the tripartition error.
199              
200             =head2 Time-consistency
201              
202             Another useful property of Phylogenetic Networks that appears in the literature
203             is that of time-consistency or real-time hybrids [BSS]. Roughly speaking, a
204             network admits a temporal representation if it can be drawn in such a way
205             that tree arcs (those whose end is a tree node) are inclined downwards, while
206             hybridization arcs (those whose end is a hybrid node) are horizontal.
207             This package checks for time-consistency and, if so, a temporal representation
208             is provided.
209              
210             =head1 AUTHOR
211              
212             Gabriel Cardona, gabriel(dot)cardona(at)uib(dot)es
213             Gabriel Valiente, valiente(at)lsi(dot)upc(dot)edu
214              
215             =head1 SEE ALSO
216              
217             =over
218              
219             =item [CRV1]
220              
221             G. Cardona, F. Rossello, G. Valiente. Tripartitions do not always
222             discriminate phylogenetic networks. arXiv:0707.2376v1 [q-bio.PE]
223              
224             =item [CRV2]
225              
226             G. Cardona, F. Rossello, G. Valiente. A Distance Measure for
227             Tree-Child Phylogenetic Networks. Preprint.
228              
229             =item [NetGen]
230              
231             M.M. Morin, and B.M.E. Moret. NetGen: generating phylogenetic networks
232             with diploid hybrids. Bioinformatics 22 (2006), 1921-1923
233              
234             =item [PhyloNet]
235              
236             PhyloNet: "Phylogenetic Networks Toolkit".
237             http://bioinfo.cs.rice.edu/phylonet
238              
239             =item [BSS]
240              
241             M. Baroni, C. Semple, and M. Steel. Hybrids in Real
242             Time. Syst. Biol. 55(1):46-56, 2006
243              
244             =back
245              
246             =head1 APPENDIX
247              
248             The rest of the documentation details each of the object methods.
249              
250             =cut
251              
252             package Bio::PhyloNetwork;
253              
254 5     5   511 use strict;
  5         5  
  5         110  
255 5     5   14 use warnings;
  5         6  
  5         110  
256              
257 5     5   15 use base qw(Bio::Root::Root);
  5         3  
  5         1200  
258              
259 5     5   1056 use Bio::PhyloNetwork::muVector;
  5         9  
  5         109  
260 5     5   1380 use Graph::Directed;
  5         274865  
  5         104  
261 5     5   1191 use Bio::TreeIO;
  5         8  
  5         113  
262 5     5   20 use Bio::Tree::Node;
  5         5  
  5         82  
263 5     5   1673 use IO::String;
  5         6806  
  5         109  
264 5     5   1693 use Array::Compare;
  5         290509  
  5         140  
265 5     5   1685 use Algorithm::Munkres;
  5         5133  
  5         23891  
266              
267             # Creator
268              
269             =head2 new
270              
271             Title : new
272             Usage : my $obj = new Bio::PhyloNetwork();
273             Function: Creates a new Bio::PhyloNetwork object
274             Returns : Bio::PhyloNetwork
275             Args : none
276             OR
277             -eNewick => string
278             OR
279             -graph => Graph::Directed object
280             OR
281             -edges => reference to an array
282             OR
283             -tree => Bio::Tree::Tree object
284             OR
285             -mudata => reference to a hash,
286             -leaves => reference to an array
287             OR
288             -mudata => reference to a hash,
289             -numleaves => integer
290              
291             Returns a Bio::PhyloNetwork object, created according to the data given:
292              
293             =over 3
294              
295             =item new()
296              
297             creates an empty network.
298              
299             =item new(-eNewick =E $str)
300              
301             creates the network whose
302             Extended Newick representation (see description above) is the string $str.
303              
304             =item new(-graph =E $graph)
305              
306             creates the network with underlying
307             graph given by the Graph::Directed object $graph
308              
309             =item new(-tree =E $tree)
310              
311             creates a network as a copy of the
312             Bio::Tree::Tree object in $tree
313              
314             =item new(-mudata =E \%mudata, -leaves =E \@leaves)
315              
316             creates the network by reconstructing it from its mu-data stored in
317             \%mudata and with set of leaves in \@leaves.
318              
319             =item new(-mudata =E \%mudata, -numleaves =E $numleaves)
320              
321             creates the network by reconstructing it from its mu-data stored in
322             \%mudata and with set of leaves in ("l1".."l$numleaves").
323              
324             =back
325              
326             =cut
327              
328             sub new {
329 831     831 1 5854 my ($pkg,@args)=@_;
330 831         1753 my $self=$pkg->SUPER::new(@args);
331 831         3480 my ($eNewick,$edgesR,$leavesR,$numleaves,$graph,$tree,$mudataR)=
332             $self->_rearrange([qw(ENEWICK
333             EDGES
334             LEAVES
335             NUMLEAVES
336             GRAPH
337             TREE
338             MUDATA)],@args);
339 831         1763 bless($self,$pkg);
340              
341 831 100       1522 $self->build_from_eNewick($eNewick) if defined $eNewick;
342 831 50       1219 $self->build_from_edges(@$edgesR) if defined $edgesR;
343 831 100       1472 $self->build_from_graph($graph) if defined $graph;
344 831 100       1859 $self->build_from_tree($tree) if defined $tree;
345 831 50 66     5378 if ((! defined $leavesR) && (defined $numleaves)) {
346 0         0 my @leaves=map {"l$_"} (1..$numleaves);
  0         0  
347 0         0 $leavesR=\@leaves;
348             }
349 831 100 66     1628 $self->build_from_mudata($mudataR,$leavesR)
350             if ((defined $mudataR) && (defined $leavesR));
351 831         2029 return $self;
352             }
353              
354             # Builders
355              
356             sub build_from_edges {
357 0     0 0 0 my ($self,@edges)=@_;
358 0         0 my $graph=Graph::Directed->new();
359 0         0 $graph->add_edges(@edges);
360 0         0 $self->{graph}=$graph;
361 0         0 $self->recompute();
362 0         0 my $labels={};
363 0         0 foreach my $node ($self->nodes()) {
364 0         0 $labels->{$node}=$node;
365             }
366 0         0 $self->{labels}=$labels;
367             }
368              
369             sub build_from_graph {
370 842     842 0 858 my ($self,$graph)=@_;
371 842         1917 my $graphcp=$graph->copy();
372 842         701234 $self->{graph}=$graphcp;
373 842         1699 $self->recompute();
374 842         816 my $labels={};
375 842         1773 foreach my $node ($self->nodes()) {
376 6430         6139 $labels->{$node}=$node;
377             }
378 842         4683 $self->{labels}=$labels;
379             }
380              
381             my $_eN_index;
382              
383             sub build_from_eNewick {
384 647     647 0 626 my ($self,$string)=@_;
385 647         589 $_eN_index=0;
386 647         2512 my $graph=Graph::Directed->new();
387 647         73822 my $labels={};
388 647         3195 my @blocks=split(/; */,$string);
389 647         961 foreach my $block (@blocks) {
390 1010         1453 my ($rt,$str)=get_root_and_subtree($block);
391 1010         1525 my ($rtlbl,$rttype,$rtid,$rtlng)=get_label_type_id_length($rt);
392 1010         1449 process_block($graph,$labels,$block,$rtid);
393 1010         1951 $labels->{$rtid}=$rtlbl.'';
394             }
395 647         928 $self->{graph}=$graph;
396 647         612 $self->{labels}=$labels;
397 647         1068 $self->recompute();
398             }
399              
400             sub process_block {
401 1904     1904 0 1875 my ($graph,$labels,$block,$rtid)=@_;
402 1904         2324 my ($rt,$str)=get_root_and_subtree($block);
403 1904         2363 my @substrs=my_split($str);
404 1904         2062 foreach my $substr (@substrs) {
405 3292         3773 my ($subrt,$subblock)=get_root_and_subtree($substr);
406 3292         4561 my ($subrtlbl,$subrttype,$subrtid,$subrtlng)=
407             get_label_type_id_length($subrt);
408 3292 100       4666 if (! $subrtlng eq '') {
409 1972         3860 $graph->add_weighted_edges($rtid,$subrtid,$subrtlng);
410             }
411             else {
412 1320         2654 $graph->add_edges($rtid,$subrtid);
413             }
414 3292 100       397145 if (! $subrttype eq '') {
415 444         916 $graph->set_edge_attribute($rtid,$subrtid,'type',$subrttype);
416             }
417 3292         53018 $subrtlbl.='';
418             # if (! $subrtlbl eq '') {
419 3292 50 66     7258 if ((! defined $labels->{$subrtid})||($labels->{$subrtid} eq '')){
    0 0        
420 3292         3909 $labels->{$subrtid}=$subrtlbl;
421             } elsif (( $labels->{$subrtid} ne $subrtlbl )&&($subrtlbl ne '')) {
422             # error
423 0         0 die("Different labels for the same node (".$labels->{$subrtid}." and $subrtlbl)");
424             }
425             # }
426 3292 100       6792 if ($subblock ne "") {
427 894         1297 process_block($graph,$labels,$subblock,$subrtid);
428             }
429             }
430             }
431              
432             sub get_root_and_subtree {
433 6206     6206 0 5011 my ($block)=@_;
434 6206         5178 my ($rt,$str)=("","");
435             # ($rt,$str)=split(/:|=/,$block);
436 6206         9058 ($rt,$str)=split(/=/,$block);
437 6206 100       9036 if ($rt eq $block) {
438             # try to look for root label at the end
439 6202         5636 my $pos=length($rt)-1;
440 6202   100     17832 while ((substr($rt,$pos,1) ne ")") && ($pos >=0)) {
441 49862         109887 $pos--;
442             }
443 6202         7263 $rt=substr($block,$pos+1,length($block)-$pos);
444 6202         6588 $str=substr($block,0,$pos+1);
445             }
446 6206         6742 $rt=trim($rt);
447 6206         6560 $str=trim($str);
448 6206         9199 return ($rt,$str);
449             }
450              
451             sub get_label_type_id_length {
452 4302     4302 0 3455 my ($string) = @_;
453 4302         3345 $string.='';
454             # print "$string\n";
455 4302 100       6437 if (index($string,'#')==-1) {
456             # no hybrid
457 3642         4643 my ($label,$length)=split(':',$string);
458 3642         2784 $label.='';
459 3642         2401 my $id;
460 3642 100 66     10509 if ((! defined $label) || ($label eq '')) {
461             # create id
462 682         616 $_eN_index++;
463 682         657 $id="T$_eN_index";
464             } else {
465 2960         2453 $id=$label;
466             }
467 3642         6759 return ($label,'',$id,$length);
468             }
469             else {
470             # hybrid
471 660         953 my ($label,$string2)=split('#',$string);
472 660         732 my ($typeid,$length)=split(':',$string2);
473 660         540 my $type=$typeid;
474 660         1461 $type =~ s/\d//g;
475 660         566 my $id=$typeid;
476 660         1163 $id =~ s/\D//g;
477 660         1486 return ($label,$type,'#'.$id,$length);
478             }
479             }
480              
481             sub trim
482             {
483 12412     12412 0 9711 my ($string) = @_;
484 12412         13530 $string =~ s/^\s+//;
485 12412         11045 $string =~ s/\s+$//;
486 12412         12019 return $string;
487             }
488              
489             sub my_split {
490 1904     1904 0 1567 my ( $string ) = @_;
491 1904         1532 my $temp="";
492 1904         1336 my @substrings;
493 1904         1417 my $level=1;
494 1904         3541 for my $i ( 1 .. length( $string ) ) {
495 74268         47177 my $char=substr($string,$i,1);
496 74268 100       75385 if ($char eq "(") {
497 1123         822 $level++;
498             }
499 74268 100       75467 if ($char eq ")") {
500 2880 100       3358 if ($level==1) {
501 1757         1604 push @substrings, $temp;
502 1757         1333 $temp="";
503             }
504 2880         1863 $level--;
505             }
506 74268 100 100     89486 if (($char eq ",") && ($level==1)) {
507 1535         1436 push @substrings, $temp;
508 1535         1134 $temp="";
509 1535         1328 $char="";
510             }
511 74268         52600 $temp = $temp.$char;
512             }
513 1904         3905 return @substrings;
514             }
515              
516             sub build_from_mudata {
517 1     1 0 1 my ($self,$mus,$leavesR)=@_;
518 1         4 my $graph=Graph::Directed->new();
519 1         116 my @nodes=keys %{$mus};
  1         4  
520 1         2 my @leaves=@{$leavesR};
  1         2  
521              
522 1         2 my %seen;
523             my @internal;
524              
525 1         2 @seen{@leaves} = ();
526              
527 1         2 foreach my $node (@nodes) {
528 13 100       19 push(@internal, $node) unless exists $seen{$node};
529             }
530              
531 1         4 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
  19         33  
532 1         3 @nodes=(@internal,@leaves);
533 1         1 my $numnodes=@nodes;
534 1         4 for (my $i=0;$i<$numnodes;$i++) {
535 13         14 my $mu=$mus->{$nodes[$i]};
536 13         11 my $j=$i+1;
537 13   66     16 while ($mu->is_positive() && $j<$numnodes) {
538 78 100       102 if ($mu->geq_poset($mus->{$nodes[$j]})) {
539 15         30 $graph->add_edges(($nodes[$i],$nodes[$j]));
540 15         673 $mu = $mu - $mus->{$nodes[$j]};
541             }
542 78         110 $j++;
543             }
544             }
545 1         4 $self->build_from_graph($graph);
546             }
547              
548             # sub relabel_tree {
549             # my ($tree)=@_;
550             # my $i=1;
551             # my $j=1;
552             # my $root=$tree->get_root_node();
553             # foreach my $node ($tree->get_nodes()) {
554             # if ($node == $root) {
555             # $node->{'_id'}="r";
556             # }
557             # elsif (! $node->is_Leaf) {
558             # $node->{'_id'}="t$i";
559             # $i++;
560             # }
561             # else {
562             # if ($node->{'_id'} eq "") {
563             # $node->{'_id'}="l$j";
564             # $j++;
565             # }
566             # }
567             # }
568             # return $tree;
569             # }
570              
571             # sub build_subtree {
572             # my ($graph,$root)=@_;
573             # foreach my $child ($root->each_Descendent) {
574             # $graph->add_edge($root->id,$child->id);
575             # $graph=build_subtree($graph,$child);
576             # }
577             # return $graph;
578             # }
579              
580             sub build_from_tree {
581 493     493 0 474 my ($self,$tree)=@_;
582             # relabel_tree($tree);
583             # my $treeroot=$tree->get_root_node;
584             # my $graph=Graph::Directed->new();
585             # $graph=build_subtree($graph,$treeroot);
586             # $self->build_from_graph($graph);
587 493         355 my $str;
588 493         2100 my $io=IO::String->new($str);
589 493         16045 my $treeio=Bio::TreeIO->new(-format => 'newick', -fh => $io);
590 493         981 $treeio->write_tree($tree);
591             # print "intern: $str\n";
592 493         863 $self->build_from_eNewick($str);
593             }
594              
595             sub recompute {
596 1489     1489 0 1355 my ($self)=@_;
597             $self->throw("Graph is not DAG:".$self->{graph})
598 1489 50       3052 unless $self->{graph}->is_dag();
599 1489         1793919 my @leaves=$self->{graph}->successorless_vertices();
600 1489         234413 @leaves=sort @leaves;
601 1489         1494 my $numleaves=@leaves;
602 1489         3259 my @roots=$self->{graph}->predecessorless_vertices();
603 1489         225393 my $numroots=@roots;
604             #$self->throw("Graph is not rooted") unless ($numroots == 1);
605 1489         3218 my @nodes=$self->{graph}->vertices();
606 1489         46283 @nodes=sort @nodes;
607 1489         1465 my $numnodes=@nodes;
608 1489         1941 foreach my $node (@nodes) {
609 10147 100       14236 if (! defined $self->{labels}->{$node}) {
610 2546         3139 $self->{labels}->{$node}='';
611             }
612             }
613 1489         1860 $self->{leaves}=\@leaves;
614 1489         1819 $self->{numleaves}=$numleaves;
615 1489         1393 $self->{roots}=\@roots;
616 1489         1485 $self->{numroots}=$numroots;
617 1489         1830 $self->{nodes}=\@nodes;
618 1489         1720 $self->{numnodes}=$numnodes;
619 1489         1618 $self->{mudata}={};
620 1489         2728 $self->{h}={};
621 1489         2958 $self->compute_height();
622 1489         2355 $self->compute_mu();
623 1489         2996 return $self;
624             }
625              
626             # Hybridizing
627              
628             sub is_attackable {
629 3236     3236 0 3597 my ($self,$u1,$v1,$u2,$v2)=@_;
630 3236 100 100     3861 if ( $self->is_hybrid_node($v1) ||
      100        
      100        
      66        
      100        
631             $self->is_hybrid_node($v2) ||
632             $self->graph->is_reachable($v2,$u1) ||
633             (($u1 eq $u2)&&($v1 eq $v2)) ||
634 2099 100       59926 (! scalar grep {($_ ne $v2) && ($self->is_tree_node($_))}
635             $self->graph->successors($u2)))
636             {
637 2578         734890 return 0;
638             }
639 658         1803 return 1;
640             }
641              
642             sub do_attack {
643 658     658 0 957 my ($self,$u1,$v1,$u2,$v2,$lbl)=@_;
644 658         700 my $graph=$self->{graph};
645 658         1402 $graph->delete_edge($u1,$v1);
646 658         39388 $graph->delete_edge($u2,$v2);
647 658         32794 $graph->add_edge($u1,"T$lbl");
648 658         41721 $graph->add_edge("T$lbl",$v1);
649 658         25370 $graph->add_edge($u2,"#H$lbl");
650 658         35224 $graph->add_edge("#H$lbl",$v2);
651 658         24949 $graph->add_edge("T$lbl","#H$lbl");
652 658         24274 $self->build_from_graph($graph);
653             }
654              
655              
656             # Computation of mu-data
657              
658             sub compute_mu {
659 1489     1489 0 1254 my ($self)=@_;
660 1489         1452 my $graph=$self->{graph};
661 1489         1133 my $mudata=$self->{mudata};
662 1489         1353 my @leaves=@{$self->{leaves}};
  1489         2837  
663 1489         1403 my $numleaves=$self->{numleaves};
664 1489         2633 for (my $i=0;$i<$numleaves;$i++) {
665 4504         8621 my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
666 4504         6903 $vec->[$i]=1;
667 4504         8986 $mudata->{$leaves[$i]}=$vec;
668             }
669 1489         1130 my $h=1;
670 1489         1231 while (my @nodes=grep {$self->{h}->{$_} == $h} @{$self->{nodes}} )
  49409         48340  
  6761         8024  
671             {
672 5272         4872 foreach my $u (@nodes) {
673 5643         8662 my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
674 5643         9396 foreach my $son ($graph->successors($u)) {
675 9972         143043 $vec+=$mudata->{$son};
676             }
677 5643         8677 $mudata->{$u}=$vec;
678             }
679 5272         5298 $h++;
680             }
681             }
682              
683             sub compute_height {
684 1489     1489 0 1615 my ($self)=@_;
685 1489         1378 my $graph=$self->{graph};
686 1489         1145 my @leaves=@{$self->{leaves}};
  1489         2718  
687 1489         1657 foreach my $leaf (@leaves) {
688 4504         4539 $self->{h}->{$leaf}=0;
689             }
690 1489         1174 my $h=0;
691 1489 100       1212 while (my @nodes=grep {(defined $self->{h}->{$_})&&($self->{h}->{$_} == $h)}
  59556         132922  
692 8250         8782 @{$self->{nodes}} )
693             {
694 6761         6066 foreach my $node (@nodes) {
695 15899         40578 foreach my $parent ($graph->predecessors($node)) {
696 13281         230400 $self->{h}->{$parent}=$h+1;
697             }
698             }
699 6761         49864 $h++;
700             }
701             }
702              
703             # Tests
704              
705             =head2 is_leaf
706              
707             Title : is_leaf
708             Usage : my $b=$net->is_leaf($u)
709             Function: tests if $u is a leaf in $net
710             Returns : boolean
711             Args : scalar
712              
713             =cut
714              
715             sub is_leaf {
716 7299     7299 1 7061 my ($self,$node)=@_;
717 7299 100       12207 if ($self->{graph}->out_degree($node) == 0) {return 1;}
  2848         83992  
718 4451         409777 return 0;
719             }
720              
721             =head2 is_root
722              
723             Title : is_root
724             Usage : my $b=$net->is_root($u)
725             Function: tests if $u is the root of $net
726             Returns : boolean
727             Args : scalar
728              
729             =cut
730              
731             sub is_root {
732 1     1 1 1 my ($self,$node)=@_;
733 1 50       4 if ($self->{graph}->in_degree($node) == 0) {return 1;}
  1         40  
734 0         0 return 0;
735             }
736              
737             =head2 is_tree_node
738              
739             Title : is_tree_node
740             Usage : my $b=$net->is_tree_node($u)
741             Function: tests if $u is a tree node in $net
742             Returns : boolean
743             Args : scalar
744              
745             =cut
746              
747             sub is_tree_node {
748 1191     1191 1 1272 my ($self,$node)=@_;
749 1191 100       2276 if ($self->{graph}->in_degree($node) <= 1) {return 1;}
  850         58029  
750 341         35510 return 0;
751             }
752              
753             =head2 is_hybrid_node
754              
755             Title : is_hybrid_node
756             Usage : my $b=$net->is_hybrid_node($u)
757             Function: tests if $u is a hybrid node in $net
758             Returns : boolean
759             Args : scalar
760              
761             =cut
762              
763             sub is_hybrid_node {
764 15951     15951 1 12673 my ($self,$node)=@_;
765 15951 100       26326 if ($self->{graph}->in_degree($node) > 1) {return 1;}
  2981         307020  
766 12970         832176 return 0;
767             }
768              
769             sub has_tree_child {
770             # has_tree_child(g,u) returns 1 if u has a tree child in graph g
771             # and 0 otherwise
772 1505     1505 0 139508 my $g=shift(@_);
773 1505         1256 my $node=shift(@_);
774 1505         2449 my @Sons=$g->successors($node);
775 1505         49362 foreach my $son (@Sons) {
776 1947 100       44515 if ($g->in_degree($son)==1) {
777 1505         104651 return 1;
778             }
779             }
780 0         0 return 0;
781             }
782              
783             =head2 is_tree_child
784              
785             Title : is_tree_child
786             Usage : my $b=$net->is_tree_child()
787             Function: tests if $net is a Tree-Child phylogenetic network
788             Returns : boolean
789             Args : Bio::PhyloNetwork
790              
791             =cut
792              
793             sub is_tree_child {
794 9095     9095 1 7070 my ($self)=@_;
795 9095 100       13185 if (defined $self->{is_tree_child}) {
796 8794         13742 return $self->{is_tree_child};
797             }
798 301         363 $self->{is_tree_child}=0;
799 301         341 my $graph=$self->{graph};
800 301         283 foreach my $node (@{$self->{nodes}}) {
  301         602  
801 2443 50 66     24039 return 0 unless ($graph->out_degree($node)==0 ||
802             has_tree_child($graph,$node));
803             }
804 301         5759 $self->{is_tree_child}=1;
805 301         580 return 1;
806             }
807              
808             # Accessors
809              
810             =head2 nodes
811              
812             Title : nodes
813             Usage : my @nodes=$net->nodes()
814             Function: returns the set of nodes of $net
815             Returns : array
816             Args : none
817              
818             =cut
819              
820             sub nodes {
821 11703     11703 1 8329 my ($self)=@_;
822 11703         7141 return @{$self->{nodes}};
  11703         34512  
823             }
824              
825             =head2 leaves
826              
827             Title : leaves
828             Usage : my @leaves=$net->leaves()
829             Function: returns the set of leaves of $net
830             Returns : array
831             Args : none
832              
833             =cut
834              
835             sub leaves {
836 136     136 1 116 my ($self)=@_;
837 136         92 return @{$self->{leaves}};
  136         325  
838             }
839              
840             =head2 roots
841              
842             Title : roots
843             Usage : my @roots=$net->roots()
844             Function: returns the set of roots of $net
845             Returns : array
846             Args : none
847              
848             =cut
849              
850             sub roots {
851 280     280 1 240 my ($self)=@_;
852 280         229 return @{$self->{roots}};
  280         547  
853             }
854              
855             =head2 internal_nodes
856              
857             Title : internal_nodes
858             Usage : my @internal_nodes=$net->internal_nodes()
859             Function: returns the set of internal nodes of $net
860             Returns : array
861             Args : none
862              
863             =cut
864              
865             sub internal_nodes {
866 654     654 1 623 my ($self)=@_;
867 654         890 return grep {! $self->is_leaf($_)} $self->nodes();
  4870         5814  
868             }
869              
870             =head2 tree_nodes
871              
872             Title : tree_nodes
873             Usage : my @tree_nodes=$net->tree_nodes()
874             Function: returns the set of tree nodes of $net
875             Returns : array
876             Args : none
877              
878             =cut
879              
880             sub tree_nodes {
881 4     4 1 5 my ($self)=@_;
882 4         6 return grep {$self->is_tree_node($_)} $self->nodes();
  44         58  
883             }
884              
885             =head2 hybrid_nodes
886              
887             Title : hybrid_nodes
888             Usage : my @hybrid_nodes=$net->hybrid_nodes()
889             Function: returns the set of hybrid nodes of $net
890             Returns : array
891             Args : none
892              
893             =cut
894              
895             sub hybrid_nodes {
896 1095     1095 1 1014 my ($self)=@_;
897 1095         1438 return grep {$self->is_hybrid_node($_)} $self->nodes();
  7653         9926  
898             }
899              
900             =head2 graph
901              
902             Title : graph
903             Usage : my $graph=$net->graph()
904             Function: returns the underlying graph of $net
905             Returns : Graph::Directed
906             Args : none
907              
908             =cut
909              
910             sub graph {
911 6016     6016 1 947831 my ($self)=@_;
912 6016         12059 return $self->{graph};
913             }
914              
915             =head2 edges
916              
917             Title : edges
918             Usage : my @edges=$net->edges()
919             Function: returns the set of edges of $net
920             Returns : array
921             Args : none
922              
923             Each element in the array is an anonimous array whose first element is the
924             head of the edge and the second one is the tail.
925              
926             =cut
927              
928             sub edges {
929 36     36 1 40 my ($self)=@_;
930 36         105 return $self->{graph}->edges();
931             }
932              
933             =head2 tree_edges
934              
935             Title : tree_edges
936             Usage : my @tree_edges=$net->tree_edges()
937             Function: returns the set of tree edges of $net
938             (those whose tail is a tree node)
939             Returns : array
940             Args : none
941              
942             =cut
943              
944             sub tree_edges {
945 4     4 1 459 my ($self)=@_;
946 4         7 return grep {$self->is_tree_node($_->[1])} $self->edges();
  33         464  
947             }
948              
949             =head2 hybrid_edges
950              
951             Title : hybrid_edges
952             Usage : my @hybrid_edges=$net->hybrid_edges()
953             Function: returns the set of hybrid edges of $net
954             (those whose tail is a hybrid node)
955             Returns : array
956             Args : none
957              
958             =cut
959              
960             sub hybrid_edges {
961 4     4 1 5 my ($self)=@_;
962 4         9 return grep {$self->is_hybrid_node($_->[1])} $self->edges();
  33         453  
963             }
964              
965             =head2 explode
966              
967             Title : explode
968             Usage : my @trees=$net->explode()
969             Function: returns the representation of $net by a set of
970             Bio::Tree:Tree objects
971             Returns : array
972             Args : none
973              
974             =cut
975              
976             sub explode {
977 1     1 1 3 my ($self)=@_;
978 1         1 my @trees;
979 1         4 $self->explode_rec(\@trees);
980 1         52 return @trees;
981             }
982              
983             sub explode_rec {
984 15     15 0 16 my ($self,$trees)=@_;
985 15         29 my @h = $self->hybrid_nodes;
986 15 100       41 if (scalar @h) {
987 7         8 my $v = shift @h;
988 7         19 for my $u ($self->{graph}->predecessors($v)) {
989 14         747 $self->{graph}->delete_edge($u,$v);
990 14         975 $self->explode_rec($trees);
991 14         440 $self->{graph}->add_edge($u,$v);
992             }
993             } else {
994 8         21 my $io = IO::String->new($self->eNewick);
995 8         342 my $treeio = Bio::TreeIO->new(-format => 'newick', -fh => $io);
996 8         15 my $tree = $treeio->next_tree;
997 8         24 $tree->contract_linear_paths;
998 8         6 push @{$trees}, $tree;
  8         29  
999             }
1000             }
1001              
1002             =head2 mudata
1003              
1004             Title : mudata
1005             Usage : my %mudata=$net->mudata()
1006             Function: returns the representation of $net by its mu-data
1007             Returns : hash
1008             Args : none
1009              
1010             $net-Emudata() returns a hash with keys the nodes of $net and each value is a
1011             muVector object holding its mu-vector.
1012              
1013             =cut
1014              
1015             sub mudata {
1016 1     1 1 2 my ($self)=@_;
1017 1         2 return %{$self->{mudata}};
  1         8  
1018             }
1019              
1020             sub mudata_node {
1021 0     0 0 0 my ($self,$u)=@_;
1022 0         0 return $self->{mudata}{$u};
1023             }
1024              
1025             =head2 heights
1026              
1027             Title : heights
1028             Usage : my %heights=$net->heights()
1029             Function: returns the heights of the nodes of $net
1030             Returns : hash
1031             Args : none
1032              
1033             $net-Eheights() returns a hash with keys the nodes of $net and each value
1034             is its height.
1035              
1036             =cut
1037              
1038             sub heights {
1039 1     1 1 2 my ($self)=@_;
1040 1         2 return %{$self->{h}};
  1         7  
1041             }
1042              
1043             sub height_node {
1044 0     0 0 0 my ($self,$u)=@_;
1045 0         0 return $self->{h}{$u};
1046             }
1047              
1048             =head2 mu_distance
1049              
1050             Title : mu_distance
1051             Usage : my $dist=$net1->mu_distance($net2)
1052             Function: Computes the mu-distance between the networks $net1 and $net2 on
1053             the same set of leaves
1054             Returns : scalar
1055             Args : Bio::PhyloNetwork
1056              
1057             =cut
1058              
1059             sub mu_distance {
1060 4546     4546 1 65022 my ($net1,$net2)=@_;
1061 4546         6010 my @nodes1=$net1->nodes;
1062 4546         6174 my @nodes2=$net2->nodes;
1063 4546         81035 my $comp = Array::Compare->new;
1064             $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1065 4546 50       350339 unless $comp->compare($net1->{leaves},$net2->{leaves});
1066 4546 50       593882 $net1->warn("Not a tree-child phylogenetic network")
1067             unless $net1->is_tree_child();
1068 4546 50       5189 $net2->warn("Not a tree-child phylogenetic network")
1069             unless $net2->is_tree_child();
1070 4546         3153 my @leaves=@{$net1->{leaves}};
  4546         8257  
1071 4546         3457 my %matched1;
1072             my %matched2;
1073 4546         4818 OUTER: foreach my $node1 (@nodes1) {
1074 36890         28486 foreach my $node2 (@nodes2) {
1075 230165 100 66     747654 if (
      100        
1076             (! exists $matched1{$node1}) && (! exists $matched2{$node2}) &&
1077             ($net1->{mudata}{$node1} == $net2->{mudata}{$node2})
1078             ) {
1079 21218         23133 $matched1{$node1}=$node2;
1080 21218         17992 $matched2{$node2}=$node1;
1081 21218         24345 next OUTER;
1082             }
1083             }
1084             }
1085 4546         25784 return (scalar @nodes1)+(scalar @nodes2)-2*(scalar keys %matched1);
1086             }
1087              
1088             =head2 mu_distance_generalized
1089              
1090             Title : mu_distance_generalized
1091             Usage : my $dist=$net1->mu_distance($net2)
1092             Function: Computes the mu-distance between the topological restrictions of
1093             networks $net1 and $net2 on its common set of leaves
1094             Returns : scalar
1095             Args : Bio::PhyloNetwork
1096              
1097             =cut
1098              
1099             sub mu_distance_generalized {
1100 0     0 1 0 my ($net1,$net2)=@_;
1101 0         0 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1102 0         0 return $netr1->mu_distance($netr2);
1103             }
1104              
1105             # mudata_string (code mu_data in a string; useful for isomorphism testing)
1106              
1107             sub mudata_string_node {
1108 2866     2866 0 3474 my ($self,$u)=@_;
1109 2866         4030 return $self->{mudata}->{$u}->display();
1110             }
1111              
1112             sub mudata_string {
1113 28276     28276 0 16643 my ($self)=@_;
1114 28276 100       64280 return $self->{mudata_string} if defined $self->{mudata_string};
1115 649         1015 my @internal=$self->internal_nodes;
1116 649         1192 my $mus=$self->{mudata};
1117 649         1829 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
  3794         6663  
1118 649         756 my $str="";
1119 649         690 foreach my $node (@internal) {
1120 2866         3177 $str=$str.$self->mudata_string_node($node);
1121             }
1122 649         921 $self->{mudata_string}=$str;
1123 649         1297 return $str;
1124             }
1125              
1126             sub is_mu_isomorphic {
1127 14138     14138 0 9730 my ($net1,$net2)=@_;
1128 14138         12368 return ($net1->mudata_string() eq $net2->mudata_string());
1129             }
1130              
1131             # tripartitions
1132              
1133             sub compute_tripartition_node {
1134 12     12 0 20 my ($self,$u)=@_;
1135             $self->warn("Cannot compute tripartitions on unrooted networks. Will assume one at random")
1136 12 50       28 unless ($self->{numroots} == 1);
1137 12         16 my $root=$self->{roots}->[0];
1138 12         13 my $graph=$self->{graph};
1139 12         24 my $graphPruned=$graph->copy();
1140 12         8061 $graphPruned->delete_vertex($u);
1141 12         1329 my $tripartition="";
1142 12         14 foreach my $leaf (@{$self->{leaves}}) {
  12         22  
1143 36         23 my $type;
1144 36 100       79 if ($graph->is_reachable($u,$leaf)) {
1145 19 100       3140 if ($graphPruned->is_reachable($root,$leaf)) {$type="B";}
  2         3105  
1146 17         29173 else {$type="A";}
1147             }
1148 17         4409 else {$type="C";}
1149 36         52 $tripartition .= $type;
1150             }
1151 12         136 $self->{tripartitions}->{$u}=$tripartition;
1152             }
1153              
1154             sub compute_tripartitions {
1155 2     2 0 3 my ($self)=@_;
1156 2         2 foreach my $node (@{$self->{nodes}}) {
  2         6  
1157 12         22 $self->compute_tripartition_node($node);
1158             }
1159             }
1160              
1161             =head2 tripartitions
1162              
1163             Title : tripartitions
1164             Usage : my %tripartitions=$net->tripartitions()
1165             Function: returns the set of tripartitions of $net
1166             Returns : hash
1167             Args : none
1168              
1169             $net-Etripartitions() returns a hash with keys the nodes of $net and each value
1170             is a string representing the tripartition of the leaves induced by the node.
1171             A string "BCA..." associated with a node u (e.g.) means, the first leaf is in
1172             the set B(u), the second one in C(u), the third one in A(u), and so on.
1173              
1174             =cut
1175              
1176             sub tripartitions {
1177 1     1 1 5 my ($self)=@_;
1178 1 50       6 $self->compute_tripartitions() unless defined $self->{tripartitions};
1179 1         2 return %{$self->{tripartitions}};
  1         8  
1180             }
1181              
1182             # to do: change to tri_distance and test for TC and time-cons
1183              
1184             sub tripartition_error {
1185 1     1 0 5 my ($net1,$net2)=@_;
1186 1         38 my $comp = Array::Compare->new;
1187             $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1188 1 50       132 unless $comp->compare($net1->{leaves},$net2->{leaves});
1189 1 50       174 $net1->warn("Not a tree-child phylogenetic network")
1190             unless $net1->is_tree_child();
1191 1 50       3 $net2->warn("Not a tree-child phylogenetic network")
1192             unless $net2->is_tree_child();
1193 1 50       5 $net1->warn("Not a time-consistent network")
1194             unless $net1->is_time_consistent();
1195 1 50       3 $net2->warn("Not a time-consistent network")
1196             unless $net2->is_time_consistent();
1197 1 50       5 $net1->compute_tripartitions() unless defined $net1->{tripartitions};
1198 1 50       5 $net2->compute_tripartitions() unless defined $net2->{tripartitions};
1199 1         5 my @edges1=$net1->{graph}->edges();
1200 1         96 my @edges2=$net2->{graph}->edges();
1201 1         54 my ($FN,$FP)=(0,0);
1202 1         2 foreach my $edge1 (@edges1) {
1203 7         2 my $matched=0;
1204 7         8 foreach my $edge2 (@edges2) {
1205 24 100       38 if ($net1->{tripartitions}->{$edge1->[1]} eq
1206             $net2->{tripartitions}->{$edge2->[1]}) {
1207 5         6 $matched=1;
1208 5         2 last;
1209             }
1210             }
1211 7 100       15 if (! $matched) {$FN++;}
  2         3  
1212             }
1213 1         3 foreach my $edge2 (@edges2) {
1214 4         5 my $matched=0;
1215 4         3 foreach my $edge1 (@edges1) {
1216 18 100       29 if ($net1->{tripartitions}->{$edge1->[1]} eq
1217             $net2->{tripartitions}->{$edge2->[1]}) {
1218 3         2 $matched=1;
1219 3         3 last;
1220             }
1221             }
1222 4 100       8 if (! $matched) {$FP++;}
  1         1  
1223             }
1224 1         16 return ($FN/(scalar @edges1)+$FP/(scalar @edges2))/2;
1225             }
1226              
1227             # Time-consistency
1228              
1229             # to do: add weak time consistency
1230              
1231             =head2 is_time_consistent
1232              
1233             Title : is_time_consistent
1234             Usage : my $b=$net->is_time_consistent()
1235             Function: tests if $net is (strong) time-consistent
1236             Returns : boolean
1237             Args : none
1238              
1239             =cut
1240              
1241             sub is_time_consistent {
1242 4     4 1 9 my ($self)=@_;
1243             $self->compute_temporal_representation()
1244 4 100       15 unless exists $self->{has_temporal_representation};
1245 4         14 return $self->{has_temporal_representation};
1246             }
1247              
1248             =head2 temporal_representation
1249              
1250             Title : temporal_representation
1251             Usage : my %time=$net->temporal_representation()
1252             Function: returns a hash containing a temporal representation of $net, or 0
1253             if $net is not time-consistent
1254             Returns : hash
1255             Args : none
1256              
1257             =cut
1258              
1259             sub temporal_representation {
1260 1     1 1 2 my ($self)=@_;
1261 1 50       2 if ($self->is_time_consistent) {
1262 1         1 return %{$self->{temporal_representation}};
  1         7  
1263             }
1264 0         0 return 0;
1265             }
1266              
1267             sub compute_temporal_representation {
1268 3     3 0 4 my ($self)=@_;
1269 3         9 my $quotient=Graph::Directed->new();
1270 3         348 my $classes=find_classes($self);
1271 3         2 my %repr;
1272 3         11 map {$repr{$_}=$classes->{$_}[0]} $self->nodes();
  19         25  
1273 3         7 foreach my $e ($self->tree_edges()) {
1274 14         451 $quotient->add_edge($repr{$e->[0]},$repr{$e->[1]});
1275             }
1276 3         107 my %temp;
1277 3         3 my $depth=0;
1278 3         7 while ($quotient->vertices()) {
1279 9 50       232 if (my @svs=$quotient->predecessorless_vertices()) {
1280 9         597 foreach my $sv (@svs) {
1281 15         20 $temp{$sv}=$depth;
1282             }
1283 9         21 $quotient->delete_vertices(@svs);
1284             } else {
1285 0         0 return 0;
1286             }
1287 9         1014 $depth++;
1288             }
1289 3         48 foreach my $node (@{$self->{nodes}}) {
  3         6  
1290 19         21 $temp{$node}=$temp{$repr{$node}}
1291             }
1292 3         5 $self->{temporal_representation}=\%temp;
1293 3         16 $self->{has_temporal_representation}=1;
1294             }
1295              
1296             sub find_classes {
1297 3     3 0 5 my ($self)=@_;
1298 3         4 my $classes={};
1299 3         7 map {$classes->{$_}=[$_]} $self->nodes();
  19         27  
1300 3         9 foreach my $e ($self->hybrid_edges()) {
1301 4         10 $classes=join_classes($classes,$e->[0],$e->[1]);
1302             }
1303 3         8 return $classes;
1304             }
1305              
1306             sub join_classes {
1307 4     4 0 5 my ($classes,$u,$v)=@_;
1308 4         4 my @clu=@{$classes->{$u}};
  4         7  
1309 4         4 my @clv=@{$classes->{$v}};
  4         7  
1310 4         7 my @cljoin=(@clu,@clv);
1311 4         4 map {$classes->{$_}=\@cljoin} @cljoin;
  10         13  
1312 4         9 return $classes;
1313             }
1314              
1315             # alignment
1316              
1317             =head2 contract_elementary
1318              
1319              
1320             Title : contract_elementary
1321             Usage : my ($contracted,$blocks)=$net->contract_elementary();
1322             Function: Returns the network $contracted, obtained by contracting elementary
1323             paths of $net into edges. The reference $blocks points to a hash
1324             where, for each node of $contracted, gives the corresponding nodes
1325             of $net that have been deleted.
1326             Returns : Bio::PhyloNetwork,reference to hash
1327             Args : none
1328              
1329             =cut
1330              
1331             sub contract_elementary {
1332 4     4 1 4 my ($self)=@_;
1333              
1334 4         11 my $contracted=$self->graph->copy();
1335 4         4429 my @nodes=$self->nodes();
1336 4         9 my $mus=$self->{mudata};
1337 4         6 my $hs=$self->{h};
1338 4         3 my %blocks;
1339 4         8 foreach my $u (@nodes) {
1340 44         48 $blocks{$u}=[$u];
1341             }
1342 4         10 my @elementary=grep { $contracted->out_degree($_) == 1} $self->tree_nodes();
  36         2067  
1343 4         325 @elementary=sort {$mus->{$b} <=> $mus->{$a} ||
1344 0 0       0 $hs->{$b} <=> $hs->{$a}} @elementary;
1345 4         17 foreach my $elem (@elementary) {
1346 0         0 my @children=$contracted->successors($elem);
1347 0         0 my $child=$children[0];
1348 0 0       0 if ($contracted->in_degree($elem) == 1) {
1349 0         0 my @parents=$contracted->predecessors($elem);
1350 0         0 my $parent=$parents[0];
1351 0         0 $contracted->add_edge($parent,$child);
1352             }
1353 0         0 $contracted->delete_vertex($elem);
1354 0         0 my @blch=@{$blocks{$child}};
  0         0  
1355 0         0 my @blem=@{$blocks{$elem}};
  0         0  
1356 0         0 $blocks{$child}=[@blem,@blch];
1357 0         0 delete $blocks{$elem};
1358             }
1359 4         20 my $contr=Bio::PhyloNetwork->new(-graph => $contracted);
1360 4         74 return $contr,\%blocks;
1361             }
1362              
1363             =head2 optimal_alignment
1364              
1365             Title : optimal_alignment
1366             Usage : my ($weight,$alignment,$wgts)=$net->optimal_alignment($net2)
1367             Function: returns the total weight of an optimal alignment,
1368             the alignment itself, and partial weights
1369             between the networks $net1 and $net2 on the same set of leaves.
1370             An optional argument allows one to use the Manhattan (default) or the
1371             Hamming distance between mu-vectors.
1372             Returns : scalar,reference to hash,reference to hash
1373             Args : Bio::PhyloNetwork,
1374             -metric => string (optional)
1375              
1376             Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1377              
1378             =cut
1379              
1380             sub optimal_alignment {
1381 2     2 1 1175 my ($net1,$net2,%params)=@_;
1382              
1383 2         7 my ($net1cont,$blocks1)=contract_elementary($net1);
1384 2         5 my ($net2cont,$blocks2)=contract_elementary($net2);
1385 2         9 my ($wc,$alignc,$weightc)=
1386             optimal_alignment_noelementary($net1cont,$net2cont,%params);
1387 2         4 my %alignment=();
1388 2         3 my $totalweigth=0;
1389 2         3 my %weigths=();
1390 2         8 foreach my $u1 (keys %$alignc) {
1391 18         13 my $u2=$alignc->{$u1};
1392 18         11 my @block1=@{$blocks1->{$u1}};
  18         30  
1393 18         11 my @block2=@{$blocks2->{$u2}};
  18         22  
1394 18   66     47 while (@block1 && @block2) {
1395 18         17 my $u1dc=pop @block1;
1396 18         17 my $u2dc=pop @block2;
1397 18         13 $alignment{$u1dc}=$u2dc;
1398 18         16 $weigths{$u1dc}=$weightc->{$u1};
1399 18         36 $totalweigth+=$weigths{$u1dc};
1400             }
1401             }
1402 2         27 return $totalweigth,\%alignment,\%weigths;
1403             }
1404              
1405             sub optimal_alignment_noelementary {
1406 2     2 0 4 my ($net1,$net2,%params)=@_;
1407              
1408 2         77 my $comp = Array::Compare->new;
1409             $net1->throw("Cannot align phylogenetic networks on different set of leaves")
1410 2 50       266 unless $comp->compare($net1->{leaves},$net2->{leaves});
1411 2         342 my $distance;
1412 2 100 66     14 if ((defined $params{-metric})and ($params{-metric} eq 'Hamming')) {
1413 1         1 $distance='Hamming';
1414             } else {
1415 1         2 $distance='Manhattan';
1416             }
1417 2         3 my $numleaves=$net1->{numleaves};
1418 2         8 my @nodes1=$net1->internal_nodes();
1419 2         7 my @nodes2=$net2->internal_nodes();
1420 2         4 my $numnodes1=@nodes1;
1421 2         3 my $numnodes2=@nodes2;
1422 2         3 my @matrix=();
1423 2         9 for (my $i=0;$i<$numnodes1;$i++) {
1424 18         22 my @row=();
1425 18         26 for (my $j=0;$j<$numnodes2;$j++) {
1426 90         142 push @row,weight($net1,$nodes1[$i],$net2,$nodes2[$j],$distance);
1427             }
1428 18         35 push @matrix,\@row;
1429             }
1430 2         3 my @alignment=();
1431 2         12 Algorithm::Munkres::assign(\@matrix,\@alignment);
1432 2         6512 my %alignmenthash;
1433             my %weighthash;
1434 2         3 my $totalw=0;
1435 2         3 foreach my $leaf (@{$net1->{leaves}}) {
  2         14  
1436 8         12 $alignmenthash{$leaf}=$leaf;
1437 8         11 $weighthash{$leaf}=0;
1438             }
1439 2         8 for (my $i=0;$i<$numnodes1;$i++) {
1440 18 100       32 if (defined $nodes2[$alignment[$i]]) {
1441 10         11 $alignmenthash{$nodes1[$i]}=$nodes2[$alignment[$i]];
1442 10         14 $weighthash{$nodes1[$i]}=$matrix[$i][$alignment[$i]];
1443 10         14 $totalw += $matrix[$i][$alignment[$i]];
1444             }
1445             }
1446 2         23 return $totalw,\%alignmenthash,\%weighthash;
1447             }
1448              
1449             =head2 optimal_alignment_generalized
1450              
1451             Title : optimal_alignment_generalized
1452             Usage : my ($weight,%alignment)=$net->optimal_alignment_generalized($net2)
1453             Function: returns the wieght of an optimal alignment, and the alignment itself,
1454             between the topological restriction of the networks $net1 and $net2
1455             on the set of common leaves.
1456             An optional argument allows one to use the Manhattan (default) or the
1457             Hamming distance between mu-vectors.
1458             Returns : scalar,hash
1459             Args : Bio::PhyloNetwork,
1460             -metric => string (optional)
1461              
1462             Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1463              
1464             =cut
1465              
1466             sub optimal_alignment_generalized {
1467 0     0 1 0 my ($net1,$net2,%params)=@_;
1468 0         0 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1469 0         0 return $netr1->optimal_alignment($netr2,%params);
1470             }
1471              
1472             sub weight {
1473 90     90 0 97 my ($net1,$v1,$net2,$v2,$distance)=@_;
1474 90         49 my $w;
1475 90 50       124 if (! defined $distance) {
1476 0         0 $distance='Manhattan';
1477             }
1478 90 100       104 if ($distance eq 'Hamming') {
1479 45         91 $w=$net1->{mudata}->{$v1}->hamming($net2->{mudata}->{$v2});
1480             } else {
1481 45         94 $w=$net1->{mudata}->{$v1}->manhattan($net2->{mudata}->{$v2});
1482             }
1483 90 100 100     98 if (($net1->is_tree_node($v1) && $net2->is_hybrid_node($v2)) ||
      100        
      66        
1484             ($net2->is_tree_node($v2) && $net1->is_hybrid_node($v1))
1485             )
1486             {
1487 36         58 $w +=1/(2*$net1->{numleaves});
1488             }
1489 90         199 return $w;
1490             }
1491              
1492              
1493             =head2 topological_restriction
1494              
1495             Title : topological_restriction
1496             Usage : my ($netr1,$netr2)=$net1->topological_restriction($net2)
1497             Function: returns the topological restriction of $net1 and $net2 on its
1498             common set of leaves
1499             Returns : Bio::PhyloNetwork, Bio::PhyloNetwork
1500             Args : Bio::PhyloNetwork
1501              
1502             =cut
1503              
1504             sub topological_restriction {
1505 0     0 1 0 my ($net1,$net2)=@_;
1506              
1507 0         0 my @leaves1=$net1->leaves();
1508 0         0 my @leaves2=$net2->leaves();
1509 0         0 my $numleaves1=scalar @leaves1;
1510 0         0 my $numleaves2=scalar @leaves2;
1511 0         0 my %position1;
1512 0         0 for (my $i=0; $i<$numleaves1; $i++) {
1513 0         0 $position1{$leaves1[$i]}=$i;
1514             }
1515 0         0 my %position2;
1516 0         0 my @commonleaves=();
1517 0         0 for (my $j=0; $j<$numleaves2; $j++) {
1518 0 0       0 if (defined $position1{$leaves2[$j]}) {
1519 0         0 push @commonleaves,$leaves2[$j];
1520 0         0 $position2{$leaves2[$j]}=$j;
1521             }
1522             }
1523 0         0 my $graphred1=$net1->{graph}->copy();
1524 0         0 my $graphred2=$net2->{graph}->copy();
1525             OUTER1:
1526 0         0 foreach my $u ($graphred1->vertices()) {
1527 0         0 my $mu=$net1->mudata_node($u);
1528 0         0 foreach my $leaf (@commonleaves) {
1529 0 0       0 if ($mu->[$position1{$leaf}]>0) {
1530 0         0 next OUTER1;
1531             }
1532             }
1533 0         0 $graphred1->delete_vertex($u);
1534             }
1535             OUTER2:
1536 0         0 foreach my $u ($graphred2->vertices()) {
1537 0         0 my $mu=$net2->mudata_node($u);
1538 0         0 foreach my $leaf (@commonleaves) {
1539 0 0       0 if ($mu->[$position2{$leaf}]>0) {
1540 0         0 next OUTER2;
1541             }
1542             }
1543 0         0 $graphred2->delete_vertex($u);
1544             }
1545 0         0 my $netr1=Bio::PhyloNetwork->new(-graph => $graphred1);
1546 0         0 my $netr2=Bio::PhyloNetwork->new(-graph => $graphred2);
1547 0         0 return ($netr1,$netr2);
1548             }
1549              
1550             # Functions for eNewick representation
1551              
1552             =head2 eNewick
1553              
1554             Title : eNewick
1555             Usage : my $str=$net->eNewick()
1556             Function: returns the eNewick representation of $net without labeling
1557             internal tree nodes
1558             Returns : string
1559             Args : none
1560              
1561             =cut
1562              
1563             sub eNewick {
1564 144     144 1 1288 my ($self)=@_;
1565 144         154 my $str="";
1566 144         163 my $seen={};
1567 144         251 foreach my $root ($self->roots()) {
1568 144         313 $str=$str.$self->eNewick_aux($root,$seen,undef)."; ";
1569             }
1570 144         483 return $str;
1571             }
1572              
1573             sub eNewick_aux {
1574 1266     1266 0 1334 my ($self,$node,$seen,$parent)=@_;
1575 1266         965 my $str='';
1576 1266 100 100     1606 if ($self->is_leaf($node) ||
1577             (defined $seen->{$node}) )
1578             {
1579 606         858 $str=make_label($self,$parent,$node);
1580             }
1581             else {
1582 660         807 $seen->{$node}=1;
1583 660         1321 my @sons=$self->{graph}->successors($node);
1584 660         21234 $str="(";
1585 660         732 foreach my $son (@sons) {
1586 1122         1982 $str=$str.$self->eNewick_aux($son,$seen,$node).",";
1587             }
1588 660         667 chop($str);
1589 660         807 $str.=")".make_label($self,$parent,$node);
1590             }
1591 1266         2974 return $str;
1592             }
1593              
1594             sub make_label {
1595 1266     1266 0 1267 my ($self,$parent,$node)=@_;
1596 1266         1139 my $str='';
1597 1266 100       1840 if ($self->is_hybrid_node($node)) {
1598 300         458 my $lbl=$self->{labels}->{$node};
1599 300 100       909 if ($lbl =~ /#/) {
1600 294         321 $lbl='';
1601             }
1602 300         322 $str.=$lbl; #$self->{labels}->{$node};
1603 300         292 $str.='#';
1604 300 100 66     718 if ((defined $parent) &&
1605             ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1606 6         301 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1607             }
1608 300         16079 $str.=substr $node,1;
1609             } else {
1610 966         1430 $str.=$self->{labels}->{$node};
1611             }
1612 1266 50 66     2870 if ((defined $parent) &&
1613             ($self->graph->has_edge_weight($parent,$node))) {
1614 0         0 $str.=":".$self->graph->get_edge_weight($parent,$node);
1615             }
1616 1266         115214 return $str;
1617             }
1618              
1619             =head2 eNewick_full
1620              
1621             Title : eNewick_full
1622             Usage : my $str=$net->eNewick_full()
1623             Function: returns the eNewick representation of $net labeling
1624             internal tree nodes
1625             Returns : string
1626             Args : none
1627              
1628             =cut
1629              
1630             sub eNewick_full {
1631 136     136 1 163 my ($self)=@_;
1632 136         139 my $str="";
1633 136         148 my $seen={};
1634 136         248 foreach my $root ($self->roots()) {
1635 136         268 $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1636             }
1637 136         439 return $str;
1638             }
1639              
1640             sub eNewick_full_aux {
1641 1162     1162 0 1165 my ($self,$node,$seen,$parent)=@_;
1642 1162         859 my $str='';
1643 1162 100 100     1407 if ($self->is_leaf($node) ||
1644             (defined $seen->{$node}) )
1645             {
1646 574         735 $str=make_label_full($self,$parent,$node);
1647             }
1648             else {
1649 588         753 $seen->{$node}=1;
1650 588         1097 my @sons=$self->{graph}->successors($node);
1651 588         23516 $str="(";
1652 588         633 foreach my $son (@sons) {
1653 1026         1599 $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1654             }
1655 588         712 chop($str);
1656 588         765 $str.=")".make_label_full($self,$parent,$node);
1657             }
1658 1162         2469 return $str;
1659             }
1660              
1661             sub make_label_full {
1662 1162     1162 0 1074 my ($self,$parent,$node)=@_;
1663 1162         993 my $str='';
1664 1162 100       1738 if ($self->is_hybrid_node($node)) {
1665 300         467 my $lbl=$self->{labels}->{$node};
1666 300 100       821 if ($lbl =~ /#/) {
1667 294         317 $lbl='';
1668             }
1669 300         288 $str.=$lbl; #$self->{labels}->{$node};
1670 300         259 $str.='#';
1671 300 100 66     759 if ((defined $parent) &&
1672             ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1673 6         309 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1674             }
1675 300         15551 $str.=substr $node,1;
1676             } else {
1677 862 100 66     3278 if ((defined $self->{labels}->{$node})&&($self->{labels}->{$node} ne '')) {
1678 858         1113 $str.=$self->{labels}->{$node};
1679             }
1680             else {
1681 4         5 $str.=$node;
1682             }
1683             }
1684 1162 50 66     2525 if ((defined $parent) &&
1685             ($self->graph->has_edge_weight($parent,$node))) {
1686 0         0 $str.=":".$self->graph->get_edge_weight($parent,$node);
1687             }
1688 1162         55769 return $str;
1689             }
1690              
1691             # sub eNewick_full {
1692             # my ($self)=@_;
1693             # my $str="";
1694             # my $seen={};
1695             # foreach my $root ($self->roots()) {
1696             # $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1697             # }
1698             # return $str;
1699             # }
1700              
1701             # sub eNewick_full_aux {
1702             # my ($self,$node,$seen,$parent)=@_;
1703             # my $str;
1704             # if ($self->is_leaf($node) ||
1705             # (defined $seen->{$node}) )
1706             # {
1707             # if ($self->is_hybrid_node($node)) {
1708             # my $tag=substr $node,1;
1709             # if ((defined $parent) &&
1710             # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1711             # $str='#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1712             # } else {
1713             # $str=$node;
1714             # }
1715             # } else {
1716             # $str=$node;
1717             # }
1718             # }
1719             # else {
1720             # $seen->{$node}=1;
1721             # my @sons=$self->{graph}->successors($node);
1722             # $str="(";
1723             # foreach my $son (@sons) {
1724             # $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1725             # }
1726             # chop($str);
1727             # if ($self->is_hybrid_node($node)) {
1728             # my $tag=substr $node,1;
1729             # if ((defined $parent) &&
1730             # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1731             # $str.=')#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1732             # } else {
1733             # $str.=")$node";
1734             # }
1735             # } else {
1736             # $str.=")$node";
1737             # }
1738             # }
1739             # if ((defined $parent) &&
1740             # ($self->graph->has_edge_weight($parent,$node))) {
1741             # $str.=":".$self->graph->get_edge_weight($parent,$node);
1742             # }
1743             # return $str;
1744             # }
1745              
1746              
1747             # displaying data
1748              
1749 5     5   34 use overload '""' => \&display;
  5         8  
  5         39  
1750              
1751             =head2 display
1752              
1753             Title : display
1754             Usage : my $str=$net->display()
1755             Function: returns a string containing all the available information on $net
1756             Returns : string
1757             Args : none
1758              
1759             =cut
1760              
1761             sub display {
1762 135     135 1 323 my ($self)=@_;
1763 135         137 my $str="";
1764 135         174 my $graph=$self->{graph};
1765 135         231 my @leaves=$self->leaves();
1766 135         139 my @nodes=@{$self->{nodes}};
  135         285  
1767 135         332 $str.= "Leaves:\t@leaves\n";
1768 135         290 $str.= "Nodes:\t@nodes\n";
1769 135         380 $str.= "Graph:\t$graph\n";
1770 135         43284 $str.= "eNewick:\t".$self->eNewick()."\n";
1771 135         239 $str.= "Full eNewick:\t".$self->eNewick_full()."\n";
1772 135         209 $str.= "Mu-data and heights:\n";
1773 135         177 foreach my $node (@nodes) {
1774 999         936 $str.= "v=$node: ";
1775 999 50       1208 if (exists $self->{labels}->{$node}) {
1776 999         1127 $str.="\tlabel=".$self->{labels}->{$node}.",";
1777             } else {
1778 0         0 $str.="\tlabel=(none),";
1779             }
1780 999         2156 $str.= "\th=".$self->{h}->{$node}.", \tmu=".$self->{mudata}->{$node}."\n";
1781             }
1782 135 50       309 if (exists $self->{has_temporal_representation}) {
1783 0         0 $str.= "Temporal representation:\n";
1784 0 0       0 if ($self->{has_temporal_representation}) {
1785 0         0 foreach my $node (@nodes) {
1786 0         0 $str.= "v=$node; ";
1787 0         0 $str.= "\tt=".$self->{temporal_representation}->{$node}."\n";
1788             }
1789             } else {
1790 0         0 $str.= "Does not exist.\n";
1791             }
1792             }
1793 135 50       231 if (exists $self->{tripartitions}) {
1794 0         0 $str.= "Tripartitions:\n";
1795 0         0 foreach my $node (@nodes) {
1796 0         0 $str.= "v=$node; ";
1797 0         0 $str.= "\ttheta=".$self->{tripartitions}->{$node}."\n";
1798             }
1799             }
1800 135         405 return $str;
1801             }
1802              
1803             1;