File Coverage

Bio/PhyloNetwork.pm
Criterion Covered Total %
statement 610 696 87.6
branch 123 164 75.0
condition 67 84 79.7
subroutine 70 76 92.1
pod 31 65 47.6
total 901 1085 83.0


line stmt bran cond sub pod time code
1             #
2             # Module for Bio::PhyloNetwork
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Gabriel Cardona
7             #
8             # Copyright Gabriel Cardona, Gabriel Valiente
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::PhyloNetwork - Module to compute with Phylogenetic Networks
17              
18             =head1 SYNOPSIS
19              
20             use Bio::PhyloNetwork;
21              
22             # Create a PhyloNetwork object from a eNewick string
23             my $net1=Bio::PhyloNetwork->new(
24             -eNewick=>'t0:((H1,(H2,l2)),H2); H1:((H3,l1)); H2:((H3,(l3,H1))); H3:(l4);'
25             );
26              
27             # Print all available data
28             print $net1;
29              
30             # Rebuild $net1 from its mu_data
31             my %mudata=$net1->mudata();
32             my $net2=Bio::PhyloNetwork->new(-mudata=>\%mudata,-numleaves=>4);
33             print $net2;
34             print "d=".$net1->mu_distance($net2)."\n";
35              
36             # Get another one and compute distance
37             my $net3=Bio::PhyloNetwork->new(
38             -eNewick=>'(l2,((l1,(H1,l4)),H1))r; (l3)H1;'
39             );
40             print "d=".$net1->mu_distance($net3)."\n";
41              
42             # ...and find an optimal alignment w.r.t. the Manhattan distance (default)
43             my ($weight,%alignment)=$net1->optimal_alignment($net3);
44             print "weight:$weight\n";
45             foreach my $node1 (keys %alignment) {
46             print "$node1 => ".$alignment{$node1}."\n";
47             }
48             # ...or the Hamming distance
49              
50             my ($weightH,%alignmentH)=$net1->optimal_alignment($net3,-metric=>'Hamming');
51             print "weight:$weightH\n";
52             foreach my $node1 (keys %alignmentH) {
53             print "$node1 => ".$alignmentH{$node1}."\n";
54             }
55              
56             # Test for time consistency of $net1
57             if ($net1->is_time_consistent) {
58             print "net1 is time consistent\n"
59             }
60             else {
61             print "net1 is not time consistent\n"
62             }
63              
64             # create a network from the list of edges
65             my $net4=Bio::PhyloNetwork->new(-edges=>
66             [qw(r s r t s u s c t c t v u b u l3 u b v b v l4 b l2 c l1)]);
67              
68             # Test for time consistency of $net3
69             if ($net4->is_time_consistent) {
70             print "net4 is time consistent\n"
71             }
72             else {
73             print "net4 is not time consistent\n"
74             }
75              
76             # And print all information on net4
77             print $net4;
78              
79             # Compute some tripartitions
80             my %triparts=$net1->tripartitions();
81              
82             # Now these are stored
83             print $net1;
84              
85             # And can compute the tripartition error
86             print "dtr=".$net1->tripartition_error($net3)."\n";
87              
88             =head1 DESCRIPTION
89              
90             =head2 Phylogenetic Networks
91              
92             This is a module to work with phylogenetic networks. Phylogenetic networks
93             have been studied over the last years as a richer model of the evolutionary
94             history of sets of organisms than phylogenetic trees, because they take not
95             only mutation events but also recombination and horizontal gene transfer
96             events into account.
97              
98             The natural model for describing the evolutionary
99             history of a set of sequences under recombination events is a DAG, hence
100             this package relies on the package Graph::Directed to represent the
101             underlying graph of a phylogenetic network. We refer the reader to [CRV1,CRV2]
102             for formal definitions related to phylogenetic networks.
103              
104             =head2 eNewick description
105              
106             With this package, phylogenetic networks can be given by its eNewick
107             string. This description appeared in other packages related to
108             phylogenetic networks (see [PhyloNet] and [NetGen]); in fact, these two
109             packages use different descriptions. The Bio::PhyloNetwork
110             package allows both of them, but uses the second one in its output.
111              
112             The first approach [PhyloNet] goes as follows: For each hybrid node H, say with
113             parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k+1 different
114             nodes; let each of the first k copies be a child of one of the u_1,...,u_k
115             (one for each) and have no children (hence we will have k extra leaves);
116             as for the last copy, let it have no parents and have v_1,...,v_l be its
117             children. This way we get a forest; each of the trees will be rooted at either
118             one root of the phylogenetic network or a hybrid node of it; the set of leaves
119             (of the whole forest) will be the set of leaves of the original network
120             together with the set of hybrid nodes (each of them repeated as many times
121             as its in-degree). Then, the eNewick representation of the phylogenetic network
122             will be the Newick representation of all the trees in the obtained forest,
123             each of them with its root labeled.
124              
125             The second approach [NetGen] goes as follows: For each hybrid node H, say with
126             parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k different
127             nodes; let the first copy be a child of u_1 and have all v_1,v_2,...v_l as
128             its children; let the other copies be child of u_2,...,u_k (one for each)
129             and have no children. This way, we get a tree whose set of leaves is the
130             set of leaves of the original network together with the set of hybrid nodes
131             (possibly repeated). Then the Newick string of the obtained tree (note that
132             some internal nodes will be labeled and some leaves will be repeated) is
133             the eNewick string of the phylogenetic network.
134              
135             For example, consider the network depicted below:
136              
137             r
138             / \
139             / \
140             U V
141             / \ / \
142             1 \ / 3
143             H
144             |
145             2
146              
147             If the first approach is taken, we get the forest:
148              
149             r
150             / \
151             / \
152             U V
153             / \ / \
154             1 H H 3
155             |
156             H
157             |
158             2
159              
160             Hence, the eNewick string is '((1,H),(H,3))r; (2)H;'.
161              
162             As for the second one, one gets the tree:
163              
164             r
165             / \
166             / \
167             U V
168             / \ / \
169             1 H | 3
170             H
171             |
172             2
173              
174             Hence, the eNewick string is '((1,H),((2)H,3))r;'.
175              
176             Note: when rooting a tree, this package allows the notations
177             '(subtree,subtree,...)root' as well as 'root:(subtree,subtree,...)', but
178             the first one is used when writing eNewick strings.
179              
180             =head2 Tree-child phylogenetic networks
181              
182             Tree-child (TC) phylogenetic networks are a special class of phylogenetic
183             networks for which a distance, called mu-distance, is defined [CRV2]
184             based on certain data (mu-data) associated to every node.
185             Moreover, this distance extends the
186             Robinson-Foulds on phylogenetic trees. This package allows testing for a
187             phylogenetic network if it is TC and computes mu-distances between networks
188             over the same set of leaves.
189              
190             Moreover, the mu-data allows one to define the optimal
191             (in some precise sense) alignment between networks
192             over the same set of leaves. This package also computes this optimal alignment.
193              
194             =head2 Tripartitions
195              
196             Although tripartitions (see [CRV1] and the references therein) do not allow
197             to define distances, this package outputs tripartitions and computes a weak
198             form of the tripartition error.
199              
200             =head2 Time-consistency
201              
202             Another useful property of Phylogenetic Networks that appears in the literature
203             is that of time-consistency or real-time hybrids [BSS]. Roughly speaking, a
204             network admits a temporal representation if it can be drawn in such a way
205             that tree arcs (those whose end is a tree node) are inclined downwards, while
206             hybridization arcs (those whose end is a hybrid node) are horizontal.
207             This package checks for time-consistency and, if so, a temporal representation
208             is provided.
209              
210             =head1 AUTHOR
211              
212             Gabriel Cardona, gabriel(dot)cardona(at)uib(dot)es
213             Gabriel Valiente, valiente(at)lsi(dot)upc(dot)edu
214              
215             =head1 SEE ALSO
216              
217             =over
218              
219             =item [CRV1]
220              
221             G. Cardona, F. Rossello, G. Valiente. Tripartitions do not always
222             discriminate phylogenetic networks. arXiv:0707.2376v1 [q-bio.PE]
223              
224             =item [CRV2]
225              
226             G. Cardona, F. Rossello, G. Valiente. A Distance Measure for
227             Tree-Child Phylogenetic Networks. Preprint.
228              
229             =item [NetGen]
230              
231             M.M. Morin, and B.M.E. Moret. NetGen: generating phylogenetic networks
232             with diploid hybrids. Bioinformatics 22 (2006), 1921-1923
233              
234             =item [PhyloNet]
235              
236             PhyloNet: "Phylogenetic Networks Toolkit".
237             http://bioinfo.cs.rice.edu/phylonet
238              
239             =item [BSS]
240              
241             M. Baroni, C. Semple, and M. Steel. Hybrids in Real
242             Time. Syst. Biol. 55(1):46-56, 2006
243              
244             =back
245              
246             =head1 APPENDIX
247              
248             The rest of the documentation details each of the object methods.
249              
250             =cut
251              
252             package Bio::PhyloNetwork;
253              
254 5     5   646 use strict;
  5         9  
  5         139  
255 5     5   22 use warnings;
  5         9  
  5         144  
256              
257 5     5   25 use base qw(Bio::Root::Root);
  5         67  
  5         1323  
258              
259 5     5   1189 use Bio::PhyloNetwork::muVector;
  5         14  
  5         139  
260 5     5   1062 use Graph::Directed;
  5         365583  
  5         142  
261 5     5   1506 use Bio::TreeIO;
  5         21  
  5         186  
262 5     5   40 use Bio::Tree::Node;
  5         11  
  5         115  
263 5     5   1704 use IO::String;
  5         9863  
  5         180  
264 5     5   1821 use Array::Compare;
  5         548723  
  5         154  
265 5     5   1548 use Algorithm::Munkres;
  5         6245  
  5         27661  
266              
267             # Creator
268              
269             =head2 new
270              
271             Title : new
272             Usage : my $obj = new Bio::PhyloNetwork();
273             Function: Creates a new Bio::PhyloNetwork object
274             Returns : Bio::PhyloNetwork
275             Args : none
276             OR
277             -eNewick => string
278             OR
279             -graph => Graph::Directed object
280             OR
281             -edges => reference to an array
282             OR
283             -tree => Bio::Tree::Tree object
284             OR
285             -mudata => reference to a hash,
286             -leaves => reference to an array
287             OR
288             -mudata => reference to a hash,
289             -numleaves => integer
290              
291             Returns a Bio::PhyloNetwork object, created according to the data given:
292              
293             =over 3
294              
295             =item new()
296              
297             creates an empty network.
298              
299             =item new(-eNewick =E $str)
300              
301             creates the network whose
302             Extended Newick representation (see description above) is the string $str.
303              
304             =item new(-graph =E $graph)
305              
306             creates the network with underlying
307             graph given by the Graph::Directed object $graph
308              
309             =item new(-tree =E $tree)
310              
311             creates a network as a copy of the
312             Bio::Tree::Tree object in $tree
313              
314             =item new(-mudata =E \%mudata, -leaves =E \@leaves)
315              
316             creates the network by reconstructing it from its mu-data stored in
317             \%mudata and with set of leaves in \@leaves.
318              
319             =item new(-mudata =E \%mudata, -numleaves =E $numleaves)
320              
321             creates the network by reconstructing it from its mu-data stored in
322             \%mudata and with set of leaves in ("l1".."l$numleaves").
323              
324             =back
325              
326             =cut
327              
328             sub new {
329 817     817 1 8805 my ($pkg,@args)=@_;
330 817         2904 my $self=$pkg->SUPER::new(@args);
331 817         6871 my ($eNewick,$edgesR,$leavesR,$numleaves,$graph,$tree,$mudataR)=
332             $self->_rearrange([qw(ENEWICK
333             EDGES
334             LEAVES
335             NUMLEAVES
336             GRAPH
337             TREE
338             MUDATA)],@args);
339 817         2408 bless($self,$pkg);
340              
341 817 100       2672 $self->build_from_eNewick($eNewick) if defined $eNewick;
342 817 50       2437 $self->build_from_edges(@$edgesR) if defined $edgesR;
343 817 100       2205 $self->build_from_graph($graph) if defined $graph;
344 817 100       3495 $self->build_from_tree($tree) if defined $tree;
345 817 50 66     10934 if ((! defined $leavesR) && (defined $numleaves)) {
346 0         0 my @leaves=map {"l$_"} (1..$numleaves);
  0         0  
347 0         0 $leavesR=\@leaves;
348             }
349 817 100 66     2708 $self->build_from_mudata($mudataR,$leavesR)
350             if ((defined $mudataR) && (defined $leavesR));
351 817         4503 return $self;
352             }
353              
354             # Builders
355              
356             sub build_from_edges {
357 0     0 0 0 my ($self,@edges)=@_;
358 0         0 my $graph=Graph::Directed->new();
359 0         0 $graph->add_edges(@edges);
360 0         0 $self->{graph}=$graph;
361 0         0 $self->recompute();
362 0         0 my $labels={};
363 0         0 foreach my $node ($self->nodes()) {
364 0         0 $labels->{$node}=$node;
365             }
366 0         0 $self->{labels}=$labels;
367             }
368              
369             sub build_from_graph {
370 829     829 0 1939 my ($self,$graph)=@_;
371 829         3470 my $graphcp=$graph->copy();
372 829         1347382 $self->{graph}=$graphcp;
373 829         3430 $self->recompute();
374 829         1860 my $labels={};
375 829         2899 foreach my $node ($self->nodes()) {
376 6327         9603 $labels->{$node}=$node;
377             }
378 829         6890 $self->{labels}=$labels;
379             }
380              
381             my $_eN_index;
382              
383             sub build_from_eNewick {
384 633     633 0 1672 my ($self,$string)=@_;
385 633         1124 $_eN_index=0;
386 633         4959 my $graph=Graph::Directed->new();
387 633         164247 my $labels={};
388 633         5176 my @blocks=split(/; */,$string);
389 633         1692 foreach my $block (@blocks) {
390 996         3266 my ($rt,$str)=get_root_and_subtree($block);
391 996         2484 my ($rtlbl,$rttype,$rtid,$rtlng)=get_label_type_id_length($rt);
392 996         3098 process_block($graph,$labels,$block,$rtid);
393 996         3354 $labels->{$rtid}=$rtlbl.'';
394             }
395 633         2007 $self->{graph}=$graph;
396 633         1902 $self->{labels}=$labels;
397 633         2251 $self->recompute();
398             }
399              
400             sub process_block {
401 1876     1876 0 3834 my ($graph,$labels,$block,$rtid)=@_;
402 1876         2878 my ($rt,$str)=get_root_and_subtree($block);
403 1876         4251 my @substrs=my_split($str);
404 1876         3321 foreach my $substr (@substrs) {
405 3236         5181 my ($subrt,$subblock)=get_root_and_subtree($substr);
406 3236         5698 my ($subrtlbl,$subrttype,$subrtid,$subrtlng)=
407             get_label_type_id_length($subrt);
408 3236 100       6682 if (! $subrtlng eq '') {
409 1916         5939 $graph->add_weighted_edges($rtid,$subrtid,$subrtlng);
410             }
411             else {
412 1320         3653 $graph->add_edges($rtid,$subrtid);
413             }
414 3236 100       813441 if (! $subrttype eq '') {
415 444         1350 $graph->set_edge_attribute($rtid,$subrtid,'type',$subrttype);
416             }
417 3236         99424 $subrtlbl.='';
418             # if (! $subrtlbl eq '') {
419 3236 50 66     9615 if ((! defined $labels->{$subrtid})||($labels->{$subrtid} eq '')){
    0 0        
420 3236         6154 $labels->{$subrtid}=$subrtlbl;
421             } elsif (( $labels->{$subrtid} ne $subrtlbl )&&($subrtlbl ne '')) {
422             # error
423 0         0 die("Different labels for the same node (".$labels->{$subrtid}." and $subrtlbl)");
424             }
425             # }
426 3236 100       9589 if ($subblock ne "") {
427 880         2361 process_block($graph,$labels,$subblock,$subrtid);
428             }
429             }
430             }
431              
432             sub get_root_and_subtree {
433 6108     6108 0 8799 my ($block)=@_;
434 6108         8564 my ($rt,$str)=("","");
435             # ($rt,$str)=split(/:|=/,$block);
436 6108         14012 ($rt,$str)=split(/=/,$block);
437 6108 100       12141 if ($rt eq $block) {
438             # try to look for root label at the end
439 6104         8326 my $pos=length($rt)-1;
440 6104   100     19713 while ((substr($rt,$pos,1) ne ")") && ($pos >=0)) {
441 48616         109492 $pos--;
442             }
443 6104         10048 $rt=substr($block,$pos+1,length($block)-$pos);
444 6104         10076 $str=substr($block,0,$pos+1);
445             }
446 6108         9745 $rt=trim($rt);
447 6108         9476 $str=trim($str);
448 6108         15030 return ($rt,$str);
449             }
450              
451             sub get_label_type_id_length {
452 4232     4232 0 6320 my ($string) = @_;
453 4232         5506 $string.='';
454             # print "$string\n";
455 4232 100       10256 if (index($string,'#')==-1) {
456             # no hybrid
457 3572         8402 my ($label,$length)=split(':',$string);
458 3572         5223 $label.='';
459 3572         4200 my $id;
460 3572 100 66     11030 if ((! defined $label) || ($label eq '')) {
461             # create id
462 682         950 $_eN_index++;
463 682         1056 $id="T$_eN_index";
464             } else {
465 2890         4048 $id=$label;
466             }
467 3572         12230 return ($label,'',$id,$length);
468             }
469             else {
470             # hybrid
471 660         1645 my ($label,$string2)=split('#',$string);
472 660         1425 my ($typeid,$length)=split(':',$string2);
473 660         902 my $type=$typeid;
474 660         2115 $type =~ s/\d//g;
475 660         949 my $id=$typeid;
476 660         1669 $id =~ s/\D//g;
477 660         2365 return ($label,$type,'#'.$id,$length);
478             }
479             }
480              
481             sub trim
482             {
483 12216     12216 0 16799 my ($string) = @_;
484 12216         22364 $string =~ s/^\s+//;
485 12216         17050 $string =~ s/\s+$//;
486 12216         18650 return $string;
487             }
488              
489             sub my_split {
490 1876     1876 0 3019 my ( $string ) = @_;
491 1876         2486 my $temp="";
492 1876         2252 my @substrings;
493 1876         2280 my $level=1;
494 1876         5090 for my $i ( 1 .. length( $string ) ) {
495 72516         76839 my $char=substr($string,$i,1);
496 72516 100       92807 if ($char eq "(") {
497 1109         1187 $level++;
498             }
499 72516 100       90112 if ($char eq ")") {
500 2838 100       4399 if ($level==1) {
501 1729         2733 push @substrings, $temp;
502 1729         2154 $temp="";
503             }
504 2838         2849 $level--;
505             }
506 72516 100 100     100599 if (($char eq ",") && ($level==1)) {
507 1507         2429 push @substrings, $temp;
508 1507         2240 $temp="";
509 1507         1991 $char="";
510             }
511 72516         81188 $temp = $temp.$char;
512             }
513 1876         4974 return @substrings;
514             }
515              
516             sub build_from_mudata {
517 1     1 0 4 my ($self,$mus,$leavesR)=@_;
518 1         9 my $graph=Graph::Directed->new();
519 1         372 my @nodes=keys %{$mus};
  1         8  
520 1         4 my @leaves=@{$leavesR};
  1         6  
521              
522 1         3 my %seen;
523             my @internal;
524              
525 1         6 @seen{@leaves} = ();
526              
527 1         4 foreach my $node (@nodes) {
528 13 100       44 push(@internal, $node) unless exists $seen{$node};
529             }
530              
531 1         9 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
  20         92  
532 1         9 @nodes=(@internal,@leaves);
533 1         4 my $numnodes=@nodes;
534 1         6 for (my $i=0;$i<$numnodes;$i++) {
535 13         42 my $mu=$mus->{$nodes[$i]};
536 13         28 my $j=$i+1;
537 13   66     43 while ($mu->is_positive() && $j<$numnodes) {
538 78 100       336 if ($mu->geq_poset($mus->{$nodes[$j]})) {
539 15         75 $graph->add_edges(($nodes[$i],$nodes[$j]));
540 15         2213 $mu = $mu - $mus->{$nodes[$j]};
541             }
542 78         276 $j++;
543             }
544             }
545 1         8 $self->build_from_graph($graph);
546             }
547              
548             # sub relabel_tree {
549             # my ($tree)=@_;
550             # my $i=1;
551             # my $j=1;
552             # my $root=$tree->get_root_node();
553             # foreach my $node ($tree->get_nodes()) {
554             # if ($node == $root) {
555             # $node->{'_id'}="r";
556             # }
557             # elsif (! $node->is_Leaf) {
558             # $node->{'_id'}="t$i";
559             # $i++;
560             # }
561             # else {
562             # if ($node->{'_id'} eq "") {
563             # $node->{'_id'}="l$j";
564             # $j++;
565             # }
566             # }
567             # }
568             # return $tree;
569             # }
570              
571             # sub build_subtree {
572             # my ($graph,$root)=@_;
573             # foreach my $child ($root->each_Descendent) {
574             # $graph->add_edge($root->id,$child->id);
575             # $graph=build_subtree($graph,$child);
576             # }
577             # return $graph;
578             # }
579              
580             sub build_from_tree {
581 479     479 0 1345 my ($self,$tree)=@_;
582             # relabel_tree($tree);
583             # my $treeroot=$tree->get_root_node;
584             # my $graph=Graph::Directed->new();
585             # $graph=build_subtree($graph,$treeroot);
586             # $self->build_from_graph($graph);
587 479         816 my $str;
588 479         4559 my $io=IO::String->new($str);
589 479         38807 my $treeio=Bio::TreeIO->new(-format => 'newick', -fh => $io);
590 479         1708 $treeio->write_tree($tree);
591             # print "intern: $str\n";
592 479         1990 $self->build_from_eNewick($str);
593             }
594              
595             sub recompute {
596 1462     1462 0 3491 my ($self)=@_;
597             $self->throw("Graph is not DAG:".$self->{graph})
598 1462 50       6321 unless $self->{graph}->is_dag();
599 1462         3532683 my @leaves=$self->{graph}->successorless_vertices();
600 1462         469432 @leaves=sort @leaves;
601 1462         3043 my $numleaves=@leaves;
602 1462         6519 my @roots=$self->{graph}->predecessorless_vertices();
603 1462         459628 my $numroots=@roots;
604             #$self->throw("Graph is not rooted") unless ($numroots == 1);
605 1462         5218 my @nodes=$self->{graph}->vertices();
606 1462         86499 @nodes=sort @nodes;
607 1462         2779 my $numnodes=@nodes;
608 1462         3334 foreach my $node (@nodes) {
609 9974 100       18877 if (! defined $self->{labels}->{$node}) {
610 2520         4790 $self->{labels}->{$node}='';
611             }
612             }
613 1462         4441 $self->{leaves}=\@leaves;
614 1462         2753 $self->{numleaves}=$numleaves;
615 1462         3531 $self->{roots}=\@roots;
616 1462         3541 $self->{numroots}=$numroots;
617 1462         4712 $self->{nodes}=\@nodes;
618 1462         3179 $self->{numnodes}=$numnodes;
619 1462         7068 $self->{mudata}={};
620 1462         3427 $self->{h}={};
621 1462         6015 $self->compute_height();
622 1462         5275 $self->compute_mu();
623 1462         6961 return $self;
624             }
625              
626             # Hybridizing
627              
628             sub is_attackable {
629 2968     2968 0 6124 my ($self,$u1,$v1,$u2,$v2)=@_;
630 2968 100 100     6471 if ( $self->is_hybrid_node($v1) ||
      100        
      100        
      100        
      100        
631             $self->is_hybrid_node($v2) ||
632             $self->graph->is_reachable($v2,$u1) ||
633             (($u1 eq $u2)&&($v1 eq $v2)) ||
634 2020 100       116969 (! scalar grep {($_ ne $v2) && ($self->is_tree_node($_))}
635             $self->graph->successors($u2)))
636             {
637 2323         1333171 return 0;
638             }
639 645         2726 return 1;
640             }
641              
642             sub do_attack {
643 645     645 0 2294 my ($self,$u1,$v1,$u2,$v2,$lbl)=@_;
644 645         1406 my $graph=$self->{graph};
645 645         2640 $graph->delete_edge($u1,$v1);
646 645         84402 $graph->delete_edge($u2,$v2);
647 645         61274 $graph->add_edge($u1,"T$lbl");
648 645         87239 $graph->add_edge("T$lbl",$v1);
649 645         50857 $graph->add_edge($u2,"#H$lbl");
650 645         70636 $graph->add_edge("#H$lbl",$v2);
651 645         50244 $graph->add_edge("T$lbl","#H$lbl");
652 645         50455 $self->build_from_graph($graph);
653             }
654              
655              
656             # Computation of mu-data
657              
658             sub compute_mu {
659 1462     1462 0 3232 my ($self)=@_;
660 1462         2685 my $graph=$self->{graph};
661 1462         2550 my $mudata=$self->{mudata};
662 1462         1934 my @leaves=@{$self->{leaves}};
  1462         4225  
663 1462         3344 my $numleaves=$self->{numleaves};
664 1462         4650 for (my $i=0;$i<$numleaves;$i++) {
665 4423         15432 my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
666 4423         11841 $vec->[$i]=1;
667 4423         13151 $mudata->{$leaves[$i]}=$vec;
668             }
669 1462         2679 my $h=1;
670 1462         3008 while (my @nodes=grep {$self->{h}->{$_} == $h} @{$self->{nodes}} )
  48394         84036  
  6616         12291  
671             {
672 5154         7792 foreach my $u (@nodes) {
673 5551         10628 my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
674 5551         12809 foreach my $son ($graph->successors($u)) {
675 9807         261331 $vec+=$mudata->{$son};
676             }
677 5551         12564 $mudata->{$u}=$vec;
678             }
679 5154         7790 $h++;
680             }
681             }
682              
683             sub compute_height {
684 1462     1462 0 3145 my ($self)=@_;
685 1462         2873 my $graph=$self->{graph};
686 1462         2381 my @leaves=@{$self->{leaves}};
  1462         5192  
687 1462         3279 foreach my $leaf (@leaves) {
688 4423         7763 $self->{h}->{$leaf}=0;
689             }
690 1462         2606 my $h=0;
691 1462 100       2897 while (my @nodes=grep {(defined $self->{h}->{$_})&&($self->{h}->{$_} == $h)}
  58368         163403  
692 8078         13945 @{$self->{nodes}} )
693             {
694 6616         10098 foreach my $node (@nodes) {
695 15600         64041 foreach my $parent ($graph->predecessors($node)) {
696 13067         443936 $self->{h}->{$parent}=$h+1;
697             }
698             }
699 6616         92314 $h++;
700             }
701             }
702              
703             # Tests
704              
705             =head2 is_leaf
706              
707             Title : is_leaf
708             Usage : my $b=$net->is_leaf($u)
709             Function: tests if $u is a leaf in $net
710             Returns : boolean
711             Args : scalar
712              
713             =cut
714              
715             sub is_leaf {
716 7203     7203 1 11312 my ($self,$node)=@_;
717 7203 100       15178 if ($self->{graph}->out_degree($node) == 0) {return 1;}
  2806         149921  
718 4397         734639 return 0;
719             }
720              
721             =head2 is_root
722              
723             Title : is_root
724             Usage : my $b=$net->is_root($u)
725             Function: tests if $u is the root of $net
726             Returns : boolean
727             Args : scalar
728              
729             =cut
730              
731             sub is_root {
732 1     1 1 4 my ($self,$node)=@_;
733 1 50       7 if ($self->{graph}->in_degree($node) == 0) {return 1;}
  1         99  
734 0         0 return 0;
735             }
736              
737             =head2 is_tree_node
738              
739             Title : is_tree_node
740             Usage : my $b=$net->is_tree_node($u)
741             Function: tests if $u is a tree node in $net
742             Returns : boolean
743             Args : scalar
744              
745             =cut
746              
747             sub is_tree_node {
748 1163     1163 1 2721 my ($self,$node)=@_;
749 1163 100       3532 if ($self->{graph}->in_degree($node) <= 1) {return 1;}
  837         114081  
750 326         65524 return 0;
751             }
752              
753             =head2 is_hybrid_node
754              
755             Title : is_hybrid_node
756             Usage : my $b=$net->is_hybrid_node($u)
757             Function: tests if $u is a hybrid node in $net
758             Returns : boolean
759             Args : scalar
760              
761             =cut
762              
763             sub is_hybrid_node {
764 15481     15481 1 21768 my ($self,$node)=@_;
765 15481 100       29075 if ($self->{graph}->in_degree($node) > 1) {return 1;}
  2856         491648  
766 12625         1387310 return 0;
767             }
768              
769             sub has_tree_child {
770             # has_tree_child(g,u) returns 1 if u has a tree child in graph g
771             # and 0 otherwise
772 1505     1505 0 243635 my $g=shift(@_);
773 1505         1910 my $node=shift(@_);
774 1505         3099 my @Sons=$g->successors($node);
775 1505         87308 foreach my $son (@Sons) {
776 1922 100       72793 if ($g->in_degree($son)==1) {
777 1505         180630 return 1;
778             }
779             }
780 0         0 return 0;
781             }
782              
783             =head2 is_tree_child
784              
785             Title : is_tree_child
786             Usage : my $b=$net->is_tree_child()
787             Function: tests if $net is a Tree-Child phylogenetic network
788             Returns : boolean
789             Args : Bio::PhyloNetwork
790              
791             =cut
792              
793             sub is_tree_child {
794 9095     9095 1 11944 my ($self)=@_;
795 9095 100       16205 if (defined $self->{is_tree_child}) {
796 8794         15631 return $self->{is_tree_child};
797             }
798 301         563 $self->{is_tree_child}=0;
799 301         606 my $graph=$self->{graph};
800 301         368 foreach my $node (@{$self->{nodes}}) {
  301         706  
801 2443 50 66     40355 return 0 unless ($graph->out_degree($node)==0 ||
802             has_tree_child($graph,$node));
803             }
804 301         10925 $self->{is_tree_child}=1;
805 301         667 return 1;
806             }
807              
808             # Accessors
809              
810             =head2 nodes
811              
812             Title : nodes
813             Usage : my @nodes=$net->nodes()
814             Function: returns the set of nodes of $net
815             Returns : array
816             Args : none
817              
818             =cut
819              
820             sub nodes {
821 11676     11676 1 14804 my ($self)=@_;
822 11676         12247 return @{$self->{nodes}};
  11676         42532  
823             }
824              
825             =head2 leaves
826              
827             Title : leaves
828             Usage : my @leaves=$net->leaves()
829             Function: returns the set of leaves of $net
830             Returns : array
831             Args : none
832              
833             =cut
834              
835             sub leaves {
836 136     136 1 200 my ($self)=@_;
837 136         193 return @{$self->{leaves}};
  136         382  
838             }
839              
840             =head2 roots
841              
842             Title : roots
843             Usage : my @roots=$net->roots()
844             Function: returns the set of roots of $net
845             Returns : array
846             Args : none
847              
848             =cut
849              
850             sub roots {
851 280     280 1 431 my ($self)=@_;
852 280         339 return @{$self->{roots}};
  280         692  
853             }
854              
855             =head2 internal_nodes
856              
857             Title : internal_nodes
858             Usage : my @internal_nodes=$net->internal_nodes()
859             Function: returns the set of internal nodes of $net
860             Returns : array
861             Args : none
862              
863             =cut
864              
865             sub internal_nodes {
866 640     640 1 1330 my ($self)=@_;
867 640         1682 return grep {! $self->is_leaf($_)} $self->nodes();
  4774         8616  
868             }
869              
870             =head2 tree_nodes
871              
872             Title : tree_nodes
873             Usage : my @tree_nodes=$net->tree_nodes()
874             Function: returns the set of tree nodes of $net
875             Returns : array
876             Args : none
877              
878             =cut
879              
880             sub tree_nodes {
881 4     4 1 9 my ($self)=@_;
882 4         11 return grep {$self->is_tree_node($_)} $self->nodes();
  44         108  
883             }
884              
885             =head2 hybrid_nodes
886              
887             Title : hybrid_nodes
888             Usage : my @hybrid_nodes=$net->hybrid_nodes()
889             Function: returns the set of hybrid nodes of $net
890             Returns : array
891             Args : none
892              
893             =cut
894              
895             sub hybrid_nodes {
896 1095     1095 1 1466 my ($self)=@_;
897 1095         1859 return grep {$self->is_hybrid_node($_)} $self->nodes();
  7653         10327  
898             }
899              
900             =head2 graph
901              
902             Title : graph
903             Usage : my $graph=$net->graph()
904             Function: returns the underlying graph of $net
905             Returns : Graph::Directed
906             Args : none
907              
908             =cut
909              
910             sub graph {
911 5822     5822 1 2128438 my ($self)=@_;
912 5822         15925 return $self->{graph};
913             }
914              
915             =head2 edges
916              
917             Title : edges
918             Usage : my @edges=$net->edges()
919             Function: returns the set of edges of $net
920             Returns : array
921             Args : none
922              
923             Each element in the array is an anonimous array whose first element is the
924             head of the edge and the second one is the tail.
925              
926             =cut
927              
928             sub edges {
929 36     36 1 60 my ($self)=@_;
930 36         106 return $self->{graph}->edges();
931             }
932              
933             =head2 tree_edges
934              
935             Title : tree_edges
936             Usage : my @tree_edges=$net->tree_edges()
937             Function: returns the set of tree edges of $net
938             (those whose tail is a tree node)
939             Returns : array
940             Args : none
941              
942             =cut
943              
944             sub tree_edges {
945 4     4 1 834 my ($self)=@_;
946 4         10 return grep {$self->is_tree_node($_->[1])} $self->edges();
  33         961  
947             }
948              
949             =head2 hybrid_edges
950              
951             Title : hybrid_edges
952             Usage : my @hybrid_edges=$net->hybrid_edges()
953             Function: returns the set of hybrid edges of $net
954             (those whose tail is a hybrid node)
955             Returns : array
956             Args : none
957              
958             =cut
959              
960             sub hybrid_edges {
961 4     4 1 9 my ($self)=@_;
962 4         13 return grep {$self->is_hybrid_node($_->[1])} $self->edges();
  33         980  
963             }
964              
965             =head2 explode
966              
967             Title : explode
968             Usage : my @trees=$net->explode()
969             Function: returns the representation of $net by a set of
970             Bio::Tree:Tree objects
971             Returns : array
972             Args : none
973              
974             =cut
975              
976             sub explode {
977 1     1 1 4 my ($self)=@_;
978 1         3 my @trees;
979 1         7 $self->explode_rec(\@trees);
980 1         80 return @trees;
981             }
982              
983             sub explode_rec {
984 15     15 0 39 my ($self,$trees)=@_;
985 15         57 my @h = $self->hybrid_nodes;
986 15 100       57 if (scalar @h) {
987 7         15 my $v = shift @h;
988 7         30 for my $u ($self->{graph}->predecessors($v)) {
989 14         1944 $self->{graph}->delete_edge($u,$v);
990 14         2221 $self->explode_rec($trees);
991 14         1070 $self->{graph}->add_edge($u,$v);
992             }
993             } else {
994 8         34 my $io = IO::String->new($self->eNewick);
995 8         666 my $treeio = Bio::TreeIO->new(-format => 'newick', -fh => $io);
996 8         33 my $tree = $treeio->next_tree;
997 8         51 $tree->contract_linear_paths;
998 8         20 push @{$trees}, $tree;
  8         58  
999             }
1000             }
1001              
1002             =head2 mudata
1003              
1004             Title : mudata
1005             Usage : my %mudata=$net->mudata()
1006             Function: returns the representation of $net by its mu-data
1007             Returns : hash
1008             Args : none
1009              
1010             $net-Emudata() returns a hash with keys the nodes of $net and each value is a
1011             muVector object holding its mu-vector.
1012              
1013             =cut
1014              
1015             sub mudata {
1016 1     1 1 4 my ($self)=@_;
1017 1         3 return %{$self->{mudata}};
  1         14  
1018             }
1019              
1020             sub mudata_node {
1021 0     0 0 0 my ($self,$u)=@_;
1022 0         0 return $self->{mudata}{$u};
1023             }
1024              
1025             =head2 heights
1026              
1027             Title : heights
1028             Usage : my %heights=$net->heights()
1029             Function: returns the heights of the nodes of $net
1030             Returns : hash
1031             Args : none
1032              
1033             $net-Eheights() returns a hash with keys the nodes of $net and each value
1034             is its height.
1035              
1036             =cut
1037              
1038             sub heights {
1039 1     1 1 4 my ($self)=@_;
1040 1         3 return %{$self->{h}};
  1         16  
1041             }
1042              
1043             sub height_node {
1044 0     0 0 0 my ($self,$u)=@_;
1045 0         0 return $self->{h}{$u};
1046             }
1047              
1048             =head2 mu_distance
1049              
1050             Title : mu_distance
1051             Usage : my $dist=$net1->mu_distance($net2)
1052             Function: Computes the mu-distance between the networks $net1 and $net2 on
1053             the same set of leaves
1054             Returns : scalar
1055             Args : Bio::PhyloNetwork
1056              
1057             =cut
1058              
1059             sub mu_distance {
1060 4546     4546 1 79878 my ($net1,$net2)=@_;
1061 4546         7380 my @nodes1=$net1->nodes;
1062 4546         7742 my @nodes2=$net2->nodes;
1063 4546         92546 my $comp = Array::Compare->new;
1064             $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1065 4546 50       504209 unless $comp->compare($net1->{leaves},$net2->{leaves});
1066 4546 50       773934 $net1->warn("Not a tree-child phylogenetic network")
1067             unless $net1->is_tree_child();
1068 4546 50       6622 $net2->warn("Not a tree-child phylogenetic network")
1069             unless $net2->is_tree_child();
1070 4546         5052 my @leaves=@{$net1->{leaves}};
  4546         10316  
1071 4546         5853 my %matched1;
1072             my %matched2;
1073 4546         6404 OUTER: foreach my $node1 (@nodes1) {
1074 36890         41971 foreach my $node2 (@nodes2) {
1075 230165 100 66     737475 if (
      100        
1076             (! exists $matched1{$node1}) && (! exists $matched2{$node2}) &&
1077             ($net1->{mudata}{$node1} == $net2->{mudata}{$node2})
1078             ) {
1079 21218         33036 $matched1{$node1}=$node2;
1080 21218         26746 $matched2{$node2}=$node1;
1081 21218         32275 next OUTER;
1082             }
1083             }
1084             }
1085 4546         30115 return (scalar @nodes1)+(scalar @nodes2)-2*(scalar keys %matched1);
1086             }
1087              
1088             =head2 mu_distance_generalized
1089              
1090             Title : mu_distance_generalized
1091             Usage : my $dist=$net1->mu_distance($net2)
1092             Function: Computes the mu-distance between the topological restrictions of
1093             networks $net1 and $net2 on its common set of leaves
1094             Returns : scalar
1095             Args : Bio::PhyloNetwork
1096              
1097             =cut
1098              
1099             sub mu_distance_generalized {
1100 0     0 1 0 my ($net1,$net2)=@_;
1101 0         0 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1102 0         0 return $netr1->mu_distance($netr2);
1103             }
1104              
1105             # mudata_string (code mu_data in a string; useful for isomorphism testing)
1106              
1107             sub mudata_string_node {
1108 2812     2812 0 4594 my ($self,$u)=@_;
1109 2812         5933 return $self->{mudata}->{$u}->display();
1110             }
1111              
1112             sub mudata_string {
1113 24906     24906 0 26973 my ($self)=@_;
1114 24906 100       75823 return $self->{mudata_string} if defined $self->{mudata_string};
1115 635         2181 my @internal=$self->internal_nodes;
1116 635         1951 my $mus=$self->{mudata};
1117 635         4186 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
  3760         10665  
1118 635         1517 my $str="";
1119 635         1508 foreach my $node (@internal) {
1120 2812         5447 $str=$str.$self->mudata_string_node($node);
1121             }
1122 635         1588 $self->{mudata_string}=$str;
1123 635         1932 return $str;
1124             }
1125              
1126             sub is_mu_isomorphic {
1127 12453     12453 0 17372 my ($net1,$net2)=@_;
1128 12453         15391 return ($net1->mudata_string() eq $net2->mudata_string());
1129             }
1130              
1131             # tripartitions
1132              
1133             sub compute_tripartition_node {
1134 12     12 0 27 my ($self,$u)=@_;
1135             $self->warn("Cannot compute tripartitions on unrooted networks. Will assume one at random")
1136 12 50       36 unless ($self->{numroots} == 1);
1137 12         23 my $root=$self->{roots}->[0];
1138 12         24 my $graph=$self->{graph};
1139 12         43 my $graphPruned=$graph->copy();
1140 12         16265 $graphPruned->delete_vertex($u);
1141 12         2558 my $tripartition="";
1142 12         17 foreach my $leaf (@{$self->{leaves}}) {
  12         30  
1143 36         44 my $type;
1144 36 100       90 if ($graph->is_reachable($u,$leaf)) {
1145 19 100       5489 if ($graphPruned->is_reachable($root,$leaf)) {$type="B";}
  2         7679  
1146 17         55086 else {$type="A";}
1147             }
1148 17         7552 else {$type="C";}
1149 36         96 $tripartition .= $type;
1150             }
1151 12         171 $self->{tripartitions}->{$u}=$tripartition;
1152             }
1153              
1154             sub compute_tripartitions {
1155 2     2 0 5 my ($self)=@_;
1156 2         4 foreach my $node (@{$self->{nodes}}) {
  2         4  
1157 12         38 $self->compute_tripartition_node($node);
1158             }
1159             }
1160              
1161             =head2 tripartitions
1162              
1163             Title : tripartitions
1164             Usage : my %tripartitions=$net->tripartitions()
1165             Function: returns the set of tripartitions of $net
1166             Returns : hash
1167             Args : none
1168              
1169             $net-Etripartitions() returns a hash with keys the nodes of $net and each value
1170             is a string representing the tripartition of the leaves induced by the node.
1171             A string "BCA..." associated with a node u (e.g.) means, the first leaf is in
1172             the set B(u), the second one in C(u), the third one in A(u), and so on.
1173              
1174             =cut
1175              
1176             sub tripartitions {
1177 1     1 1 6 my ($self)=@_;
1178 1 50       5 $self->compute_tripartitions() unless defined $self->{tripartitions};
1179 1         3 return %{$self->{tripartitions}};
  1         10  
1180             }
1181              
1182             # to do: change to tri_distance and test for TC and time-cons
1183              
1184             sub tripartition_error {
1185 1     1 0 8 my ($net1,$net2)=@_;
1186 1         39 my $comp = Array::Compare->new;
1187             $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1188 1 50       140 unless $comp->compare($net1->{leaves},$net2->{leaves});
1189 1 50       196 $net1->warn("Not a tree-child phylogenetic network")
1190             unless $net1->is_tree_child();
1191 1 50       4 $net2->warn("Not a tree-child phylogenetic network")
1192             unless $net2->is_tree_child();
1193 1 50       4 $net1->warn("Not a time-consistent network")
1194             unless $net1->is_time_consistent();
1195 1 50       5 $net2->warn("Not a time-consistent network")
1196             unless $net2->is_time_consistent();
1197 1 50       22 $net1->compute_tripartitions() unless defined $net1->{tripartitions};
1198 1 50       6 $net2->compute_tripartitions() unless defined $net2->{tripartitions};
1199 1         5 my @edges1=$net1->{graph}->edges();
1200 1         150 my @edges2=$net2->{graph}->edges();
1201 1         88 my ($FN,$FP)=(0,0);
1202 1         2 foreach my $edge1 (@edges1) {
1203 7         24 my $matched=0;
1204 7         8 foreach my $edge2 (@edges2) {
1205 23 100       42 if ($net1->{tripartitions}->{$edge1->[1]} eq
1206             $net2->{tripartitions}->{$edge2->[1]}) {
1207 5         5 $matched=1;
1208 5         7 last;
1209             }
1210             }
1211 7 100       10 if (! $matched) {$FN++;}
  2         3  
1212             }
1213 1         2 foreach my $edge2 (@edges2) {
1214 4         6 my $matched=0;
1215 4         7 foreach my $edge1 (@edges1) {
1216 17 100       42 if ($net1->{tripartitions}->{$edge1->[1]} eq
1217             $net2->{tripartitions}->{$edge2->[1]}) {
1218 3         5 $matched=1;
1219 3         6 last;
1220             }
1221             }
1222 4 100       11 if (! $matched) {$FP++;}
  1         3  
1223             }
1224 1         20 return ($FN/(scalar @edges1)+$FP/(scalar @edges2))/2;
1225             }
1226              
1227             # Time-consistency
1228              
1229             # to do: add weak time consistency
1230              
1231             =head2 is_time_consistent
1232              
1233             Title : is_time_consistent
1234             Usage : my $b=$net->is_time_consistent()
1235             Function: tests if $net is (strong) time-consistent
1236             Returns : boolean
1237             Args : none
1238              
1239             =cut
1240              
1241             sub is_time_consistent {
1242 4     4 1 13 my ($self)=@_;
1243             $self->compute_temporal_representation()
1244 4 100       18 unless exists $self->{has_temporal_representation};
1245 4         29 return $self->{has_temporal_representation};
1246             }
1247              
1248             =head2 temporal_representation
1249              
1250             Title : temporal_representation
1251             Usage : my %time=$net->temporal_representation()
1252             Function: returns a hash containing a temporal representation of $net, or 0
1253             if $net is not time-consistent
1254             Returns : hash
1255             Args : none
1256              
1257             =cut
1258              
1259             sub temporal_representation {
1260 1     1 1 3 my ($self)=@_;
1261 1 50       5 if ($self->is_time_consistent) {
1262 1         2 return %{$self->{temporal_representation}};
  1         11  
1263             }
1264 0         0 return 0;
1265             }
1266              
1267             sub compute_temporal_representation {
1268 3     3 0 6 my ($self)=@_;
1269 3         9 my $quotient=Graph::Directed->new();
1270 3         561 my $classes=find_classes($self);
1271 3         5 my %repr;
1272 3         7 map {$repr{$_}=$classes->{$_}[0]} $self->nodes();
  19         34  
1273 3         9 foreach my $e ($self->tree_edges()) {
1274 14         796 $quotient->add_edge($repr{$e->[0]},$repr{$e->[1]});
1275             }
1276 3         199 my %temp;
1277 3         4 my $depth=0;
1278 3         8 while ($quotient->vertices()) {
1279 9 50       367 if (my @svs=$quotient->predecessorless_vertices()) {
1280 9         1064 foreach my $sv (@svs) {
1281 15         21 $temp{$sv}=$depth;
1282             }
1283 9         27 $quotient->delete_vertices(@svs);
1284             } else {
1285 0         0 return 0;
1286             }
1287 9         1855 $depth++;
1288             }
1289 3         80 foreach my $node (@{$self->{nodes}}) {
  3         6  
1290 19         26 $temp{$node}=$temp{$repr{$node}}
1291             }
1292 3         7 $self->{temporal_representation}=\%temp;
1293 3         19 $self->{has_temporal_representation}=1;
1294             }
1295              
1296             sub find_classes {
1297 3     3 0 9 my ($self)=@_;
1298 3         5 my $classes={};
1299 3         7 map {$classes->{$_}=[$_]} $self->nodes();
  19         65  
1300 3         10 foreach my $e ($self->hybrid_edges()) {
1301 4         9 $classes=join_classes($classes,$e->[0],$e->[1]);
1302             }
1303 3         8 return $classes;
1304             }
1305              
1306             sub join_classes {
1307 4     4 0 8 my ($classes,$u,$v)=@_;
1308 4         4 my @clu=@{$classes->{$u}};
  4         8  
1309 4         6 my @clv=@{$classes->{$v}};
  4         5  
1310 4         9 my @cljoin=(@clu,@clv);
1311 4         6 map {$classes->{$_}=\@cljoin} @cljoin;
  10         16  
1312 4         9 return $classes;
1313             }
1314              
1315             # alignment
1316              
1317             =head2 contract_elementary
1318              
1319              
1320             Title : contract_elementary
1321             Usage : my ($contracted,$blocks)=$net->contract_elementary();
1322             Function: Returns the network $contracted, obtained by contracting elementary
1323             paths of $net into edges. The reference $blocks points to a hash
1324             where, for each node of $contracted, gives the corresponding nodes
1325             of $net that have been deleted.
1326             Returns : Bio::PhyloNetwork,reference to hash
1327             Args : none
1328              
1329             =cut
1330              
1331             sub contract_elementary {
1332 4     4 1 10 my ($self)=@_;
1333              
1334 4         18 my $contracted=$self->graph->copy();
1335 4         9197 my @nodes=$self->nodes();
1336 4         14 my $mus=$self->{mudata};
1337 4         9 my $hs=$self->{h};
1338 4         8 my %blocks;
1339 4         11 foreach my $u (@nodes) {
1340 44         80 $blocks{$u}=[$u];
1341             }
1342 4         14 my @elementary=grep { $contracted->out_degree($_) == 1} $self->tree_nodes();
  36         5077  
1343 4         809 @elementary=sort {$mus->{$b} <=> $mus->{$a} ||
1344 0 0       0 $hs->{$b} <=> $hs->{$a}} @elementary;
1345 4         12 foreach my $elem (@elementary) {
1346 0         0 my @children=$contracted->successors($elem);
1347 0         0 my $child=$children[0];
1348 0 0       0 if ($contracted->in_degree($elem) == 1) {
1349 0         0 my @parents=$contracted->predecessors($elem);
1350 0         0 my $parent=$parents[0];
1351 0         0 $contracted->add_edge($parent,$child);
1352             }
1353 0         0 $contracted->delete_vertex($elem);
1354 0         0 my @blch=@{$blocks{$child}};
  0         0  
1355 0         0 my @blem=@{$blocks{$elem}};
  0         0  
1356 0         0 $blocks{$child}=[@blem,@blch];
1357 0         0 delete $blocks{$elem};
1358             }
1359 4         22 my $contr=Bio::PhyloNetwork->new(-graph => $contracted);
1360 4         118 return $contr,\%blocks;
1361             }
1362              
1363             =head2 optimal_alignment
1364              
1365             Title : optimal_alignment
1366             Usage : my ($weight,$alignment,$wgts)=$net->optimal_alignment($net2)
1367             Function: returns the total weight of an optimal alignment,
1368             the alignment itself, and partial weights
1369             between the networks $net1 and $net2 on the same set of leaves.
1370             An optional argument allows one to use the Manhattan (default) or the
1371             Hamming distance between mu-vectors.
1372             Returns : scalar,reference to hash,reference to hash
1373             Args : Bio::PhyloNetwork,
1374             -metric => string (optional)
1375              
1376             Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1377              
1378             =cut
1379              
1380             sub optimal_alignment {
1381 2     2 1 1205 my ($net1,$net2,%params)=@_;
1382              
1383 2         10 my ($net1cont,$blocks1)=contract_elementary($net1);
1384 2         10 my ($net2cont,$blocks2)=contract_elementary($net2);
1385 2         10 my ($wc,$alignc,$weightc)=
1386             optimal_alignment_noelementary($net1cont,$net2cont,%params);
1387 2         4 my %alignment=();
1388 2         4 my $totalweigth=0;
1389 2         4 my %weigths=();
1390 2         13 foreach my $u1 (keys %$alignc) {
1391 18         25 my $u2=$alignc->{$u1};
1392 18         16 my @block1=@{$blocks1->{$u1}};
  18         37  
1393 18         20 my @block2=@{$blocks2->{$u2}};
  18         33  
1394 18   66     50 while (@block1 && @block2) {
1395 18         48 my $u1dc=pop @block1;
1396 18         19 my $u2dc=pop @block2;
1397 18         29 $alignment{$u1dc}=$u2dc;
1398 18         25 $weigths{$u1dc}=$weightc->{$u1};
1399 18         40 $totalweigth+=$weigths{$u1dc};
1400             }
1401             }
1402 2         28 return $totalweigth,\%alignment,\%weigths;
1403             }
1404              
1405             sub optimal_alignment_noelementary {
1406 2     2 0 5 my ($net1,$net2,%params)=@_;
1407              
1408 2         86 my $comp = Array::Compare->new;
1409             $net1->throw("Cannot align phylogenetic networks on different set of leaves")
1410 2 50       285 unless $comp->compare($net1->{leaves},$net2->{leaves});
1411 2         544 my $distance;
1412 2 100 66     14 if ((defined $params{-metric})and ($params{-metric} eq 'Hamming')) {
1413 1         3 $distance='Hamming';
1414             } else {
1415 1         3 $distance='Manhattan';
1416             }
1417 2         5 my $numleaves=$net1->{numleaves};
1418 2         6 my @nodes1=$net1->internal_nodes();
1419 2         5 my @nodes2=$net2->internal_nodes();
1420 2         5 my $numnodes1=@nodes1;
1421 2         4 my $numnodes2=@nodes2;
1422 2         4 my @matrix=();
1423 2         7 for (my $i=0;$i<$numnodes1;$i++) {
1424 18         26 my @row=();
1425 18         29 for (my $j=0;$j<$numnodes2;$j++) {
1426 90         189 push @row,weight($net1,$nodes1[$i],$net2,$nodes2[$j],$distance);
1427             }
1428 18         46 push @matrix,\@row;
1429             }
1430 2         5 my @alignment=();
1431 2         13 Algorithm::Munkres::assign(\@matrix,\@alignment);
1432 2         11245 my %alignmenthash;
1433             my %weighthash;
1434 2         4 my $totalw=0;
1435 2         4 foreach my $leaf (@{$net1->{leaves}}) {
  2         8  
1436 8         20 $alignmenthash{$leaf}=$leaf;
1437 8         14 $weighthash{$leaf}=0;
1438             }
1439 2         9 for (my $i=0;$i<$numnodes1;$i++) {
1440 18 100       38 if (defined $nodes2[$alignment[$i]]) {
1441 10         25 $alignmenthash{$nodes1[$i]}=$nodes2[$alignment[$i]];
1442 10         23 $weighthash{$nodes1[$i]}=$matrix[$i][$alignment[$i]];
1443 10         22 $totalw += $matrix[$i][$alignment[$i]];
1444             }
1445             }
1446 2         28 return $totalw,\%alignmenthash,\%weighthash;
1447             }
1448              
1449             =head2 optimal_alignment_generalized
1450              
1451             Title : optimal_alignment_generalized
1452             Usage : my ($weight,%alignment)=$net->optimal_alignment_generalized($net2)
1453             Function: returns the wieght of an optimal alignment, and the alignment itself,
1454             between the topological restriction of the networks $net1 and $net2
1455             on the set of common leaves.
1456             An optional argument allows one to use the Manhattan (default) or the
1457             Hamming distance between mu-vectors.
1458             Returns : scalar,hash
1459             Args : Bio::PhyloNetwork,
1460             -metric => string (optional)
1461              
1462             Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1463              
1464             =cut
1465              
1466             sub optimal_alignment_generalized {
1467 0     0 1 0 my ($net1,$net2,%params)=@_;
1468 0         0 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1469 0         0 return $netr1->optimal_alignment($netr2,%params);
1470             }
1471              
1472             sub weight {
1473 90     90 0 162 my ($net1,$v1,$net2,$v2,$distance)=@_;
1474 90         101 my $w;
1475 90 50       143 if (! defined $distance) {
1476 0         0 $distance='Manhattan';
1477             }
1478 90 100       130 if ($distance eq 'Hamming') {
1479 45         143 $w=$net1->{mudata}->{$v1}->hamming($net2->{mudata}->{$v2});
1480             } else {
1481 45         105 $w=$net1->{mudata}->{$v1}->manhattan($net2->{mudata}->{$v2});
1482             }
1483 90 100 100     159 if (($net1->is_tree_node($v1) && $net2->is_hybrid_node($v2)) ||
      100        
      100        
1484             ($net2->is_tree_node($v2) && $net1->is_hybrid_node($v1))
1485             )
1486             {
1487 36         91 $w +=1/(2*$net1->{numleaves});
1488             }
1489 90         251 return $w;
1490             }
1491              
1492              
1493             =head2 topological_restriction
1494              
1495             Title : topological_restriction
1496             Usage : my ($netr1,$netr2)=$net1->topological_restriction($net2)
1497             Function: returns the topological restriction of $net1 and $net2 on its
1498             common set of leaves
1499             Returns : Bio::PhyloNetwork, Bio::PhyloNetwork
1500             Args : Bio::PhyloNetwork
1501              
1502             =cut
1503              
1504             sub topological_restriction {
1505 0     0 1 0 my ($net1,$net2)=@_;
1506              
1507 0         0 my @leaves1=$net1->leaves();
1508 0         0 my @leaves2=$net2->leaves();
1509 0         0 my $numleaves1=scalar @leaves1;
1510 0         0 my $numleaves2=scalar @leaves2;
1511 0         0 my %position1;
1512 0         0 for (my $i=0; $i<$numleaves1; $i++) {
1513 0         0 $position1{$leaves1[$i]}=$i;
1514             }
1515 0         0 my %position2;
1516 0         0 my @commonleaves=();
1517 0         0 for (my $j=0; $j<$numleaves2; $j++) {
1518 0 0       0 if (defined $position1{$leaves2[$j]}) {
1519 0         0 push @commonleaves,$leaves2[$j];
1520 0         0 $position2{$leaves2[$j]}=$j;
1521             }
1522             }
1523 0         0 my $graphred1=$net1->{graph}->copy();
1524 0         0 my $graphred2=$net2->{graph}->copy();
1525             OUTER1:
1526 0         0 foreach my $u ($graphred1->vertices()) {
1527 0         0 my $mu=$net1->mudata_node($u);
1528 0         0 foreach my $leaf (@commonleaves) {
1529 0 0       0 if ($mu->[$position1{$leaf}]>0) {
1530 0         0 next OUTER1;
1531             }
1532             }
1533 0         0 $graphred1->delete_vertex($u);
1534             }
1535             OUTER2:
1536 0         0 foreach my $u ($graphred2->vertices()) {
1537 0         0 my $mu=$net2->mudata_node($u);
1538 0         0 foreach my $leaf (@commonleaves) {
1539 0 0       0 if ($mu->[$position2{$leaf}]>0) {
1540 0         0 next OUTER2;
1541             }
1542             }
1543 0         0 $graphred2->delete_vertex($u);
1544             }
1545 0         0 my $netr1=Bio::PhyloNetwork->new(-graph => $graphred1);
1546 0         0 my $netr2=Bio::PhyloNetwork->new(-graph => $graphred2);
1547 0         0 return ($netr1,$netr2);
1548             }
1549              
1550             # Functions for eNewick representation
1551              
1552             =head2 eNewick
1553              
1554             Title : eNewick
1555             Usage : my $str=$net->eNewick()
1556             Function: returns the eNewick representation of $net without labeling
1557             internal tree nodes
1558             Returns : string
1559             Args : none
1560              
1561             =cut
1562              
1563             sub eNewick {
1564 144     144 1 1598 my ($self)=@_;
1565 144         233 my $str="";
1566 144         230 my $seen={};
1567 144         326 foreach my $root ($self->roots()) {
1568 144         379 $str=$str.$self->eNewick_aux($root,$seen,undef)."; ";
1569             }
1570 144         592 return $str;
1571             }
1572              
1573             sub eNewick_aux {
1574 1266     1266 0 1997 my ($self,$node,$seen,$parent)=@_;
1575 1266         1514 my $str='';
1576 1266 100 100     1978 if ($self->is_leaf($node) ||
1577             (defined $seen->{$node}) )
1578             {
1579 606         1042 $str=make_label($self,$parent,$node);
1580             }
1581             else {
1582 660         1068 $seen->{$node}=1;
1583 660         1467 my @sons=$self->{graph}->successors($node);
1584 660         37114 $str="(";
1585 660         936 foreach my $son (@sons) {
1586 1122         2111 $str=$str.$self->eNewick_aux($son,$seen,$node).",";
1587             }
1588 660         1014 chop($str);
1589 660         930 $str.=")".make_label($self,$parent,$node);
1590             }
1591 1266         3659 return $str;
1592             }
1593              
1594             sub make_label {
1595 1266     1266 0 1959 my ($self,$parent,$node)=@_;
1596 1266         1456 my $str='';
1597 1266 100       1940 if ($self->is_hybrid_node($node)) {
1598 300         575 my $lbl=$self->{labels}->{$node};
1599 300 100       958 if ($lbl =~ /#/) {
1600 294         349 $lbl='';
1601             }
1602 300         378 $str.=$lbl; #$self->{labels}->{$node};
1603 300         333 $str.='#';
1604 300 100 66     749 if ((defined $parent) &&
1605             ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1606 6         558 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1607             }
1608 300         23369 $str.=substr $node,1;
1609             } else {
1610 966         1868 $str.=$self->{labels}->{$node};
1611             }
1612 1266 50 66     3170 if ((defined $parent) &&
1613             ($self->graph->has_edge_weight($parent,$node))) {
1614 0         0 $str.=":".$self->graph->get_edge_weight($parent,$node);
1615             }
1616 1266         188072 return $str;
1617             }
1618              
1619             =head2 eNewick_full
1620              
1621             Title : eNewick_full
1622             Usage : my $str=$net->eNewick_full()
1623             Function: returns the eNewick representation of $net labeling
1624             internal tree nodes
1625             Returns : string
1626             Args : none
1627              
1628             =cut
1629              
1630             sub eNewick_full {
1631 136     136 1 212 my ($self)=@_;
1632 136         221 my $str="";
1633 136         194 my $seen={};
1634 136         283 foreach my $root ($self->roots()) {
1635 136         319 $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1636             }
1637 136         518 return $str;
1638             }
1639              
1640             sub eNewick_full_aux {
1641 1162     1162 0 1735 my ($self,$node,$seen,$parent)=@_;
1642 1162         1382 my $str='';
1643 1162 100 100     1720 if ($self->is_leaf($node) ||
1644             (defined $seen->{$node}) )
1645             {
1646 574         1030 $str=make_label_full($self,$parent,$node);
1647             }
1648             else {
1649 588         909 $seen->{$node}=1;
1650 588         1175 my @sons=$self->{graph}->successors($node);
1651 588         38405 $str="(";
1652 588         745 foreach my $son (@sons) {
1653 1026         1741 $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1654             }
1655 588         1083 chop($str);
1656 588         852 $str.=")".make_label_full($self,$parent,$node);
1657             }
1658 1162         2958 return $str;
1659             }
1660              
1661             sub make_label_full {
1662 1162     1162 0 1898 my ($self,$parent,$node)=@_;
1663 1162         1263 my $str='';
1664 1162 100       1682 if ($self->is_hybrid_node($node)) {
1665 300         530 my $lbl=$self->{labels}->{$node};
1666 300 100       854 if ($lbl =~ /#/) {
1667 294         388 $lbl='';
1668             }
1669 300         377 $str.=$lbl; #$self->{labels}->{$node};
1670 300         315 $str.='#';
1671 300 100 66     743 if ((defined $parent) &&
1672             ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1673 6         502 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1674             }
1675 300         22809 $str.=substr $node,1;
1676             } else {
1677 862 100 66     3072 if ((defined $self->{labels}->{$node})&&($self->{labels}->{$node} ne '')) {
1678 858         1391 $str.=$self->{labels}->{$node};
1679             }
1680             else {
1681 4         6 $str.=$node;
1682             }
1683             }
1684 1162 50 66     2588 if ((defined $parent) &&
1685             ($self->graph->has_edge_weight($parent,$node))) {
1686 0         0 $str.=":".$self->graph->get_edge_weight($parent,$node);
1687             }
1688 1162         86962 return $str;
1689             }
1690              
1691             # sub eNewick_full {
1692             # my ($self)=@_;
1693             # my $str="";
1694             # my $seen={};
1695             # foreach my $root ($self->roots()) {
1696             # $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1697             # }
1698             # return $str;
1699             # }
1700              
1701             # sub eNewick_full_aux {
1702             # my ($self,$node,$seen,$parent)=@_;
1703             # my $str;
1704             # if ($self->is_leaf($node) ||
1705             # (defined $seen->{$node}) )
1706             # {
1707             # if ($self->is_hybrid_node($node)) {
1708             # my $tag=substr $node,1;
1709             # if ((defined $parent) &&
1710             # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1711             # $str='#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1712             # } else {
1713             # $str=$node;
1714             # }
1715             # } else {
1716             # $str=$node;
1717             # }
1718             # }
1719             # else {
1720             # $seen->{$node}=1;
1721             # my @sons=$self->{graph}->successors($node);
1722             # $str="(";
1723             # foreach my $son (@sons) {
1724             # $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1725             # }
1726             # chop($str);
1727             # if ($self->is_hybrid_node($node)) {
1728             # my $tag=substr $node,1;
1729             # if ((defined $parent) &&
1730             # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1731             # $str.=')#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1732             # } else {
1733             # $str.=")$node";
1734             # }
1735             # } else {
1736             # $str.=")$node";
1737             # }
1738             # }
1739             # if ((defined $parent) &&
1740             # ($self->graph->has_edge_weight($parent,$node))) {
1741             # $str.=":".$self->graph->get_edge_weight($parent,$node);
1742             # }
1743             # return $str;
1744             # }
1745              
1746              
1747             # displaying data
1748              
1749 5     5   69 use overload '""' => \&display;
  5         11  
  5         67  
1750              
1751             =head2 display
1752              
1753             Title : display
1754             Usage : my $str=$net->display()
1755             Function: returns a string containing all the available information on $net
1756             Returns : string
1757             Args : none
1758              
1759             =cut
1760              
1761             sub display {
1762 135     135 1 448 my ($self)=@_;
1763 135         207 my $str="";
1764 135         228 my $graph=$self->{graph};
1765 135         345 my @leaves=$self->leaves();
1766 135         246 my @nodes=@{$self->{nodes}};
  135         351  
1767 135         453 $str.= "Leaves:\t@leaves\n";
1768 135         391 $str.= "Nodes:\t@nodes\n";
1769 135         511 $str.= "Graph:\t$graph\n";
1770 135         73357 $str.= "eNewick:\t".$self->eNewick()."\n";
1771 135         356 $str.= "Full eNewick:\t".$self->eNewick_full()."\n";
1772 135         283 $str.= "Mu-data and heights:\n";
1773 135         257 foreach my $node (@nodes) {
1774 999         1349 $str.= "v=$node: ";
1775 999 50       1402 if (exists $self->{labels}->{$node}) {
1776 999         1334 $str.="\tlabel=".$self->{labels}->{$node}.",";
1777             } else {
1778 0         0 $str.="\tlabel=(none),";
1779             }
1780 999         2682 $str.= "\th=".$self->{h}->{$node}.", \tmu=".$self->{mudata}->{$node}."\n";
1781             }
1782 135 50       352 if (exists $self->{has_temporal_representation}) {
1783 0         0 $str.= "Temporal representation:\n";
1784 0 0       0 if ($self->{has_temporal_representation}) {
1785 0         0 foreach my $node (@nodes) {
1786 0         0 $str.= "v=$node; ";
1787 0         0 $str.= "\tt=".$self->{temporal_representation}->{$node}."\n";
1788             }
1789             } else {
1790 0         0 $str.= "Does not exist.\n";
1791             }
1792             }
1793 135 50       294 if (exists $self->{tripartitions}) {
1794 0         0 $str.= "Tripartitions:\n";
1795 0         0 foreach my $node (@nodes) {
1796 0         0 $str.= "v=$node; ";
1797 0         0 $str.= "\ttheta=".$self->{tripartitions}->{$node}."\n";
1798             }
1799             }
1800 135         539 return $str;
1801             }
1802              
1803             1;