File Coverage

Bio/PhyloNetwork.pm
Criterion Covered Total %
statement 610 696 87.6
branch 123 164 75.0
condition 65 84 77.3
subroutine 70 76 92.1
pod 31 65 47.6
total 899 1085 82.8


line stmt bran cond sub pod time code
1             #
2             # Module for Bio::PhyloNetwork
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Gabriel Cardona
7             #
8             # Copyright Gabriel Cardona, Gabriel Valiente
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::PhyloNetwork - Module to compute with Phylogenetic Networks
17              
18             =head1 SYNOPSIS
19              
20             use Bio::PhyloNetwork;
21              
22             # Create a PhyloNetwork object from a eNewick string
23             my $net1=Bio::PhyloNetwork->new(
24             -eNewick=>'t0:((H1,(H2,l2)),H2); H1:((H3,l1)); H2:((H3,(l3,H1))); H3:(l4);'
25             );
26              
27             # Print all available data
28             print $net1;
29              
30             # Rebuild $net1 from its mu_data
31             my %mudata=$net1->mudata();
32             my $net2=Bio::PhyloNetwork->new(-mudata=>\%mudata,-numleaves=>4);
33             print $net2;
34             print "d=".$net1->mu_distance($net2)."\n";
35              
36             # Get another one and compute distance
37             my $net3=Bio::PhyloNetwork->new(
38             -eNewick=>'(l2,((l1,(H1,l4)),H1))r; (l3)H1;'
39             );
40             print "d=".$net1->mu_distance($net3)."\n";
41              
42             # ...and find an optimal alignment w.r.t. the Manhattan distance (default)
43             my ($weight,%alignment)=$net1->optimal_alignment($net3);
44             print "weight:$weight\n";
45             foreach my $node1 (keys %alignment) {
46             print "$node1 => ".$alignment{$node1}."\n";
47             }
48             # ...or the Hamming distance
49              
50             my ($weightH,%alignmentH)=$net1->optimal_alignment($net3,-metric=>'Hamming');
51             print "weight:$weightH\n";
52             foreach my $node1 (keys %alignmentH) {
53             print "$node1 => ".$alignmentH{$node1}."\n";
54             }
55              
56             # Test for time consistency of $net1
57             if ($net1->is_time_consistent) {
58             print "net1 is time consistent\n"
59             }
60             else {
61             print "net1 is not time consistent\n"
62             }
63              
64             # create a network from the list of edges
65             my $net4=Bio::PhyloNetwork->new(-edges=>
66             [qw(r s r t s u s c t c t v u b u l3 u b v b v l4 b l2 c l1)]);
67              
68             # Test for time consistency of $net3
69             if ($net4->is_time_consistent) {
70             print "net4 is time consistent\n"
71             }
72             else {
73             print "net4 is not time consistent\n"
74             }
75              
76             # And print all information on net4
77             print $net4;
78              
79             # Compute some tripartitions
80             my %triparts=$net1->tripartitions();
81              
82             # Now these are stored
83             print $net1;
84              
85             # And can compute the tripartition error
86             print "dtr=".$net1->tripartition_error($net3)."\n";
87              
88             =head1 DESCRIPTION
89              
90             =head2 Phylogenetic Networks
91              
92             This is a module to work with phylogenetic networks. Phylogenetic networks
93             have been studied over the last years as a richer model of the evolutionary
94             history of sets of organisms than phylogenetic trees, because they take not
95             only mutation events but also recombination and horizontal gene transfer
96             events into account.
97              
98             The natural model for describing the evolutionary
99             history of a set of sequences under recombination events is a DAG, hence
100             this package relies on the package Graph::Directed to represent the
101             underlying graph of a phylogenetic network. We refer the reader to [CRV1,CRV2]
102             for formal definitions related to phylogenetic networks.
103              
104             =head2 eNewick description
105              
106             With this package, phylogenetic networks can be given by its eNewick
107             string. This description appeared in other packages related to
108             phylogenetic networks (see [PhyloNet] and [NetGen]); in fact, these two
109             packages use different descriptions. The Bio::PhyloNetwork
110             package allows both of them, but uses the second one in its output.
111              
112             The first approach [PhyloNet] goes as follows: For each hybrid node H, say with
113             parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k+1 different
114             nodes; let each of the first k copies be a child of one of the u_1,...,u_k
115             (one for each) and have no children (hence we will have k extra leaves);
116             as for the last copy, let it have no parents and have v_1,...,v_l be its
117             children. This way we get a forest; each of the trees will be rooted at either
118             one root of the phylogenetic network or a hybrid node of it; the set of leaves
119             (of the whole forest) will be the set of leaves of the original network
120             together with the set of hybrid nodes (each of them repeated as many times
121             as its in-degree). Then, the eNewick representation of the phylogenetic network
122             will be the Newick representation of all the trees in the obtained forest,
123             each of them with its root labeled.
124              
125             The second approach [NetGen] goes as follows: For each hybrid node H, say with
126             parents u_1,u_2,...,u_k and children v_1,v_2,...v_l: split H in k different
127             nodes; let the first copy be a child of u_1 and have all v_1,v_2,...v_l as
128             its children; let the other copies be child of u_2,...,u_k (one for each)
129             and have no children. This way, we get a tree whose set of leaves is the
130             set of leaves of the original network together with the set of hybrid nodes
131             (possibly repeated). Then the Newick string of the obtained tree (note that
132             some internal nodes will be labeled and some leaves will be repeated) is
133             the eNewick string of the phylogenetic network.
134              
135             For example, consider the network depicted below:
136              
137             r
138             / \
139             / \
140             U V
141             / \ / \
142             1 \ / 3
143             H
144             |
145             2
146              
147             If the first approach is taken, we get the forest:
148              
149             r
150             / \
151             / \
152             U V
153             / \ / \
154             1 H H 3
155             |
156             H
157             |
158             2
159              
160             Hence, the eNewick string is '((1,H),(H,3))r; (2)H;'.
161              
162             As for the second one, one gets the tree:
163              
164             r
165             / \
166             / \
167             U V
168             / \ / \
169             1 H | 3
170             H
171             |
172             2
173              
174             Hence, the eNewick string is '((1,H),((2)H,3))r;'.
175              
176             Note: when rooting a tree, this package allows the notations
177             '(subtree,subtree,...)root' as well as 'root:(subtree,subtree,...)', but
178             the first one is used when writing eNewick strings.
179              
180             =head2 Tree-child phylogenetic networks
181              
182             Tree-child (TC) phylogenetic networks are a special class of phylogenetic
183             networks for which a distance, called mu-distance, is defined [CRV2]
184             based on certain data (mu-data) associated to every node.
185             Moreover, this distance extends the
186             Robinson-Foulds on phylogenetic trees. This package allows testing for a
187             phylogenetic network if it is TC and computes mu-distances between networks
188             over the same set of leaves.
189              
190             Moreover, the mu-data allows one to define the optimal
191             (in some precise sense) alignment between networks
192             over the same set of leaves. This package also computes this optimal alignment.
193              
194             =head2 Tripartitions
195              
196             Although tripartitions (see [CRV1] and the references therein) do not allow
197             to define distances, this package outputs tripartitions and computes a weak
198             form of the tripartition error.
199              
200             =head2 Time-consistency
201              
202             Another useful property of Phylogenetic Networks that appears in the literature
203             is that of time-consistency or real-time hybrids [BSS]. Roughly speaking, a
204             network admits a temporal representation if it can be drawn in such a way
205             that tree arcs (those whose end is a tree node) are inclined downwards, while
206             hybridization arcs (those whose end is a hybrid node) are horizontal.
207             This package checks for time-consistency and, if so, a temporal representation
208             is provided.
209              
210             =head1 AUTHOR
211              
212             Gabriel Cardona, gabriel(dot)cardona(at)uib(dot)es
213             Gabriel Valiente, valiente(at)lsi(dot)upc(dot)edu
214              
215             =head1 SEE ALSO
216              
217             =over
218              
219             =item [CRV1]
220              
221             G. Cardona, F. Rossello, G. Valiente. Tripartitions do not always
222             discriminate phylogenetic networks. arXiv:0707.2376v1 [q-bio.PE]
223              
224             =item [CRV2]
225              
226             G. Cardona, F. Rossello, G. Valiente. A Distance Measure for
227             Tree-Child Phylogenetic Networks. Preprint.
228              
229             =item [NetGen]
230              
231             M.M. Morin, and B.M.E. Moret. NetGen: generating phylogenetic networks
232             with diploid hybrids. Bioinformatics 22 (2006), 1921-1923
233              
234             =item [PhyloNet]
235              
236             PhyloNet: "Phylogenetic Networks Toolkit".
237             http://bioinfo.cs.rice.edu/phylonet
238              
239             =item [BSS]
240              
241             M. Baroni, C. Semple, and M. Steel. Hybrids in Real
242             Time. Syst. Biol. 55(1):46-56, 2006
243              
244             =back
245              
246             =head1 APPENDIX
247              
248             The rest of the documentation details each of the object methods.
249              
250             =cut
251              
252             package Bio::PhyloNetwork;
253              
254 5     5   495 use strict;
  5         5  
  5         114  
255 5     5   13 use warnings;
  5         5  
  5         112  
256              
257 5     5   12 use base qw(Bio::Root::Root);
  5         7  
  5         1202  
258              
259 5     5   1126 use Bio::PhyloNetwork::muVector;
  5         7  
  5         104  
260 5     5   1309 use Graph::Directed;
  5         276476  
  5         107  
261 5     5   1239 use Bio::TreeIO;
  5         9  
  5         124  
262 5     5   22 use Bio::Tree::Node;
  5         6  
  5         90  
263 5     5   1677 use IO::String;
  5         6685  
  5         116  
264 5     5   1692 use Array::Compare;
  5         289523  
  5         141  
265 5     5   1722 use Algorithm::Munkres;
  5         4927  
  5         23897  
266              
267             # Creator
268              
269             =head2 new
270              
271             Title : new
272             Usage : my $obj = new Bio::PhyloNetwork();
273             Function: Creates a new Bio::PhyloNetwork object
274             Returns : Bio::PhyloNetwork
275             Args : none
276             OR
277             -eNewick => string
278             OR
279             -graph => Graph::Directed object
280             OR
281             -edges => reference to an array
282             OR
283             -tree => Bio::Tree::Tree object
284             OR
285             -mudata => reference to a hash,
286             -leaves => reference to an array
287             OR
288             -mudata => reference to a hash,
289             -numleaves => integer
290              
291             Returns a Bio::PhyloNetwork object, created according to the data given:
292              
293             =over 3
294              
295             =item new()
296              
297             creates an empty network.
298              
299             =item new(-eNewick =E $str)
300              
301             creates the network whose
302             Extended Newick representation (see description above) is the string $str.
303              
304             =item new(-graph =E $graph)
305              
306             creates the network with underlying
307             graph given by the Graph::Directed object $graph
308              
309             =item new(-tree =E $tree)
310              
311             creates a network as a copy of the
312             Bio::Tree::Tree object in $tree
313              
314             =item new(-mudata =E \%mudata, -leaves =E \@leaves)
315              
316             creates the network by reconstructing it from its mu-data stored in
317             \%mudata and with set of leaves in \@leaves.
318              
319             =item new(-mudata =E \%mudata, -numleaves =E $numleaves)
320              
321             creates the network by reconstructing it from its mu-data stored in
322             \%mudata and with set of leaves in ("l1".."l$numleaves").
323              
324             =back
325              
326             =cut
327              
328             sub new {
329 1366     1366 1 8307 my ($pkg,@args)=@_;
330 1366         3592 my $self=$pkg->SUPER::new(@args);
331 1366         8216 my ($eNewick,$edgesR,$leavesR,$numleaves,$graph,$tree,$mudataR)=
332             $self->_rearrange([qw(ENEWICK
333             EDGES
334             LEAVES
335             NUMLEAVES
336             GRAPH
337             TREE
338             MUDATA)],@args);
339 1366         3399 bless($self,$pkg);
340              
341 1366 100       2954 $self->build_from_eNewick($eNewick) if defined $eNewick;
342 1366 50       2272 $self->build_from_edges(@$edgesR) if defined $edgesR;
343 1366 100       2686 $self->build_from_graph($graph) if defined $graph;
344 1366 100       4106 $self->build_from_tree($tree) if defined $tree;
345 1366 50 66     14688 if ((! defined $leavesR) && (defined $numleaves)) {
346 0         0 my @leaves=map {"l$_"} (1..$numleaves);
  0         0  
347 0         0 $leavesR=\@leaves;
348             }
349 1366 100 66     3899 $self->build_from_mudata($mudataR,$leavesR)
350             if ((defined $mudataR) && (defined $leavesR));
351 1366         5268 return $self;
352             }
353              
354             # Builders
355              
356             sub build_from_edges {
357 0     0 0 0 my ($self,@edges)=@_;
358 0         0 my $graph=Graph::Directed->new();
359 0         0 $graph->add_edges(@edges);
360 0         0 $self->{graph}=$graph;
361 0         0 $self->recompute();
362 0         0 my $labels={};
363 0         0 foreach my $node ($self->nodes()) {
364 0         0 $labels->{$node}=$node;
365             }
366 0         0 $self->{labels}=$labels;
367             }
368              
369             sub build_from_graph {
370 1357     1357 0 1697 my ($self,$graph)=@_;
371 1357         4090 my $graphcp=$graph->copy();
372 1357         1294592 $self->{graph}=$graphcp;
373 1357         3506 $self->recompute();
374 1357         1925 my $labels={};
375 1357         4082 foreach my $node ($self->nodes()) {
376 10345         11446 $labels->{$node}=$node;
377             }
378 1357         6268 $self->{labels}=$labels;
379             }
380              
381             my $_eN_index;
382              
383             sub build_from_eNewick {
384 1182     1182 0 1548 my ($self,$string)=@_;
385 1182         1942 $_eN_index=0;
386 1182         7533 my $graph=Graph::Directed->new();
387 1182         173337 my $labels={};
388 1182         8317 my @blocks=split(/; */,$string);
389 1182         2443 foreach my $block (@blocks) {
390 1545         3382 my ($rt,$str)=get_root_and_subtree($block);
391 1545         3580 my ($rtlbl,$rttype,$rtid,$rtlng)=get_label_type_id_length($rt);
392 1545         3036 process_block($graph,$labels,$block,$rtid);
393 1545         4162 $labels->{$rtid}=$rtlbl.'';
394             }
395 1182         2187 $self->{graph}=$graph;
396 1182         1738 $self->{labels}=$labels;
397 1182         2598 $self->recompute();
398             }
399              
400             sub process_block {
401 2974     2974 0 3754 my ($graph,$labels,$block,$rtid)=@_;
402 2974         4158 my ($rt,$str)=get_root_and_subtree($block);
403 2974         4471 my @substrs=my_split($str);
404 2974         4543 foreach my $substr (@substrs) {
405 5432         7458 my ($subrt,$subblock)=get_root_and_subtree($substr);
406 5432         8160 my ($subrtlbl,$subrttype,$subrtid,$subrtlng)=
407             get_label_type_id_length($subrt);
408 5432 100       9155 if (! $subrtlng eq '') {
409 4112         10588 $graph->add_weighted_edges($rtid,$subrtid,$subrtlng);
410             }
411             else {
412 1320         2740 $graph->add_edges($rtid,$subrtid);
413             }
414 5432 100       885752 if (! $subrttype eq '') {
415 444         1037 $graph->set_edge_attribute($rtid,$subrtid,'type',$subrttype);
416             }
417 5432         57020 $subrtlbl.='';
418             # if (! $subrtlbl eq '') {
419 5432 50 66     13431 if ((! defined $labels->{$subrtid})||($labels->{$subrtid} eq '')){
    0 0        
420 5432         7387 $labels->{$subrtid}=$subrtlbl;
421             } elsif (( $labels->{$subrtid} ne $subrtlbl )&&($subrtlbl ne '')) {
422             # error
423 0         0 die("Different labels for the same node (".$labels->{$subrtid}." and $subrtlbl)");
424             }
425             # }
426 5432 100       13116 if ($subblock ne "") {
427 1429         2983 process_block($graph,$labels,$subblock,$subrtid);
428             }
429             }
430             }
431              
432             sub get_root_and_subtree {
433 9951     9951 0 8583 my ($block)=@_;
434 9951         9542 my ($rt,$str)=("","");
435             # ($rt,$str)=split(/:|=/,$block);
436 9951         16003 ($rt,$str)=split(/=/,$block);
437 9951 100       16632 if ($rt eq $block) {
438             # try to look for root label at the end
439 9947         10102 my $pos=length($rt)-1;
440 9947   100     33932 while ((substr($rt,$pos,1) ne ")") && ($pos >=0)) {
441 99747         233663 $pos--;
442             }
443 9947         12616 $rt=substr($block,$pos+1,length($block)-$pos);
444 9947         13728 $str=substr($block,0,$pos+1);
445             }
446 9951         11570 $rt=trim($rt);
447 9951         10447 $str=trim($str);
448 9951         18984 return ($rt,$str);
449             }
450              
451             sub get_label_type_id_length {
452 6977     6977 0 5614 my ($string) = @_;
453 6977         5987 $string.='';
454             # print "$string\n";
455 6977 100       12185 if (index($string,'#')==-1) {
456             # no hybrid
457 6317         9494 my ($label,$length)=split(':',$string);
458 6317         6114 $label.='';
459 6317         4249 my $id;
460 6317 100 66     19031 if ((! defined $label) || ($label eq '')) {
461             # create id
462 682         613 $_eN_index++;
463 682         726 $id="T$_eN_index";
464             } else {
465 5635         6644 $id=$label;
466             }
467 6317         14430 return ($label,'',$id,$length);
468             }
469             else {
470             # hybrid
471 660         995 my ($label,$string2)=split('#',$string);
472 660         766 my ($typeid,$length)=split(':',$string2);
473 660         658 my $type=$typeid;
474 660         1544 $type =~ s/\d//g;
475 660         517 my $id=$typeid;
476 660         1223 $id =~ s/\D//g;
477 660         1486 return ($label,$type,'#'.$id,$length);
478             }
479             }
480              
481             sub trim
482             {
483 19902     19902 0 16287 my ($string) = @_;
484 19902         24376 $string =~ s/^\s+//;
485 19902         19943 $string =~ s/\s+$//;
486 19902         20753 return $string;
487             }
488              
489             sub my_split {
490 2974     2974 0 2684 my ( $string ) = @_;
491 2974         2543 my $temp="";
492 2974         2252 my @substrings;
493 2974         2238 my $level=1;
494 2974         6146 for my $i ( 1 .. length( $string ) ) {
495 145256         97246 my $char=substr($string,$i,1);
496 145256 100       154745 if ($char eq "(") {
497 1664         1773 $level++;
498             }
499 145256 100       149340 if ($char eq ")") {
500 4491 100       7124 if ($level==1) {
501 2827         2858 push @substrings, $temp;
502 2827         2422 $temp="";
503             }
504 4491         3432 $level--;
505             }
506 145256 100 100     192227 if (($char eq ",") && ($level==1)) {
507 2605         3054 push @substrings, $temp;
508 2605         2565 $temp="";
509 2605         2404 $char="";
510             }
511 145256         107165 $temp = $temp.$char;
512             }
513 2974         7548 return @substrings;
514             }
515              
516             sub build_from_mudata {
517 1     1 0 2 my ($self,$mus,$leavesR)=@_;
518 1         5 my $graph=Graph::Directed->new();
519 1         179 my @nodes=keys %{$mus};
  1         6  
520 1         2 my @leaves=@{$leavesR};
  1         3  
521              
522 1         1 my %seen;
523             my @internal;
524              
525 1         3 @seen{@leaves} = ();
526              
527 1         3 foreach my $node (@nodes) {
528 13 100       18 push(@internal, $node) unless exists $seen{$node};
529             }
530              
531 1         7 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
  21         30  
532 1         3 @nodes=(@internal,@leaves);
533 1         3 my $numnodes=@nodes;
534 1         4 for (my $i=0;$i<$numnodes;$i++) {
535 13         13 my $mu=$mus->{$nodes[$i]};
536 13         11 my $j=$i+1;
537 13   66     18 while ($mu->is_positive() && $j<$numnodes) {
538 78 100       113 if ($mu->geq_poset($mus->{$nodes[$j]})) {
539 15         30 $graph->add_edges(($nodes[$i],$nodes[$j]));
540 15         999 $mu = $mu - $mus->{$nodes[$j]};
541             }
542 78         110 $j++;
543             }
544             }
545 1         5 $self->build_from_graph($graph);
546             }
547              
548             # sub relabel_tree {
549             # my ($tree)=@_;
550             # my $i=1;
551             # my $j=1;
552             # my $root=$tree->get_root_node();
553             # foreach my $node ($tree->get_nodes()) {
554             # if ($node == $root) {
555             # $node->{'_id'}="r";
556             # }
557             # elsif (! $node->is_Leaf) {
558             # $node->{'_id'}="t$i";
559             # $i++;
560             # }
561             # else {
562             # if ($node->{'_id'} eq "") {
563             # $node->{'_id'}="l$j";
564             # $j++;
565             # }
566             # }
567             # }
568             # return $tree;
569             # }
570              
571             # sub build_subtree {
572             # my ($graph,$root)=@_;
573             # foreach my $child ($root->each_Descendent) {
574             # $graph->add_edge($root->id,$child->id);
575             # $graph=build_subtree($graph,$child);
576             # }
577             # return $graph;
578             # }
579              
580             sub build_from_tree {
581 1028     1028 0 1217 my ($self,$tree)=@_;
582             # relabel_tree($tree);
583             # my $treeroot=$tree->get_root_node;
584             # my $graph=Graph::Directed->new();
585             # $graph=build_subtree($graph,$treeroot);
586             # $self->build_from_graph($graph);
587 1028         998 my $str;
588 1028         7708 my $io=IO::String->new($str);
589 1028         56297 my $treeio=Bio::TreeIO->new(-format => 'newick', -fh => $io);
590 1028         3216 $treeio->write_tree($tree);
591             # print "intern: $str\n";
592 1028         2890 $self->build_from_eNewick($str);
593             }
594              
595             sub recompute {
596 2539     2539 0 2826 my ($self)=@_;
597             $self->throw("Graph is not DAG:".$self->{graph})
598 2539 50       7316 unless $self->{graph}->is_dag();
599 2539         3350605 my @leaves=$self->{graph}->successorless_vertices();
600 2539         428884 @leaves=sort @leaves;
601 2539         3291 my $numleaves=@leaves;
602 2539         7859 my @roots=$self->{graph}->predecessorless_vertices();
603 2539         421599 my $numroots=@roots;
604             #$self->throw("Graph is not rooted") unless ($numroots == 1);
605 2539         6965 my @nodes=$self->{graph}->vertices();
606 2539         92519 @nodes=sort @nodes;
607 2539         3024 my $numnodes=@nodes;
608 2539         4564 foreach my $node (@nodes) {
609 16737 100       27487 if (! defined $self->{labels}->{$node}) {
610 3576         5722 $self->{labels}->{$node}='';
611             }
612             }
613 2539         4324 $self->{leaves}=\@leaves;
614 2539         3968 $self->{numleaves}=$numleaves;
615 2539         3102 $self->{roots}=\@roots;
616 2539         3547 $self->{numroots}=$numroots;
617 2539         4560 $self->{nodes}=\@nodes;
618 2539         3483 $self->{numnodes}=$numnodes;
619 2539         3412 $self->{mudata}={};
620 2539         6540 $self->{h}={};
621 2539         7150 $self->compute_height();
622 2539         5425 $self->compute_mu();
623 2539         8405 return $self;
624             }
625              
626             # Hybridizing
627              
628             sub is_attackable {
629 5021     5021 0 5867 my ($self,$u1,$v1,$u2,$v2)=@_;
630 5021 100 100     7573 if ( $self->is_hybrid_node($v1) ||
      100        
      100        
      66        
      100        
631             $self->is_hybrid_node($v2) ||
632             $self->graph->is_reachable($v2,$u1) ||
633             (($u1 eq $u2)&&($v1 eq $v2)) ||
634 3525 100       118680 (! scalar grep {($_ ne $v2) && ($self->is_tree_node($_))}
635             $self->graph->successors($u2)))
636             {
637 3848         1524135 return 0;
638             }
639 1173         4400 return 1;
640             }
641              
642             sub do_attack {
643 1173     1173 0 2341 my ($self,$u1,$v1,$u2,$v2,$lbl)=@_;
644 1173         1687 my $graph=$self->{graph};
645 1173         4479 $graph->delete_edge($u1,$v1);
646 1173         97682 $graph->delete_edge($u2,$v2);
647 1173         64560 $graph->add_edge($u1,"T$lbl");
648 1173         89798 $graph->add_edge("T$lbl",$v1);
649 1173         52719 $graph->add_edge($u2,"#H$lbl");
650 1173         71520 $graph->add_edge("#H$lbl",$v2);
651 1173         52653 $graph->add_edge("T$lbl","#H$lbl");
652 1173         50418 $self->build_from_graph($graph);
653             }
654              
655              
656             # Computation of mu-data
657              
658             sub compute_mu {
659 2539     2539 0 3021 my ($self)=@_;
660 2539         3110 my $graph=$self->{graph};
661 2539         3131 my $mudata=$self->{mudata};
662 2539         2909 my @leaves=@{$self->{leaves}};
  2539         5264  
663 2539         3374 my $numleaves=$self->{numleaves};
664 2539         6574 for (my $i=0;$i<$numleaves;$i++) {
665 7654         19898 my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
666 7654         17529 $vec->[$i]=1;
667 7654         18819 $mudata->{$leaves[$i]}=$vec;
668             }
669 2539         2471 my $h=1;
670 2539         2532 while (my @nodes=grep {$self->{h}->{$_} == $h} @{$self->{nodes}} )
  78201         83166  
  11049         16196  
671             {
672 8510         9225 foreach my $u (@nodes) {
673 9083         16411 my $vec=Bio::PhyloNetwork::muVector->new($numleaves);
674 9083         16701 foreach my $son ($graph->successors($u)) {
675 16182         252376 $vec+=$mudata->{$son};
676             }
677 9083         15108 $mudata->{$u}=$vec;
678             }
679 8510         9390 $h++;
680             }
681             }
682              
683             sub compute_height {
684 2539     2539 0 3238 my ($self)=@_;
685 2539         3327 my $graph=$self->{graph};
686 2539         2621 my @leaves=@{$self->{leaves}};
  2539         5567  
687 2539         4715 foreach my $leaf (@leaves) {
688 7654         9624 $self->{h}->{$leaf}=0;
689             }
690 2539         2779 my $h=0;
691 2539 100       3111 while (my @nodes=grep {(defined $self->{h}->{$_})&&($self->{h}->{$_} == $h)}
  94938         221383  
692 13588         17000 @{$self->{nodes}} )
693             {
694 11049         11064 foreach my $node (@nodes) {
695 25529         64807 foreach my $parent ($graph->predecessors($node)) {
696 21004         395779 $self->{h}->{$parent}=$h+1;
697             }
698             }
699 11049         94270 $h++;
700             }
701             }
702              
703             # Tests
704              
705             =head2 is_leaf
706              
707             Title : is_leaf
708             Usage : my $b=$net->is_leaf($u)
709             Function: tests if $u is a leaf in $net
710             Returns : boolean
711             Args : scalar
712              
713             =cut
714              
715             sub is_leaf {
716 11004     11004 1 10414 my ($self,$node)=@_;
717 11004 100       19643 if ($self->{graph}->out_degree($node) == 0) {return 1;}
  4453         136069  
718 6551         649906 return 0;
719             }
720              
721             =head2 is_root
722              
723             Title : is_root
724             Usage : my $b=$net->is_root($u)
725             Function: tests if $u is the root of $net
726             Returns : boolean
727             Args : scalar
728              
729             =cut
730              
731             sub is_root {
732 1     1 1 2 my ($self,$node)=@_;
733 1 50       4 if ($self->{graph}->in_degree($node) == 0) {return 1;}
  1         41  
734 0         0 return 0;
735             }
736              
737             =head2 is_tree_node
738              
739             Title : is_tree_node
740             Usage : my $b=$net->is_tree_node($u)
741             Function: tests if $u is a tree node in $net
742             Returns : boolean
743             Args : scalar
744              
745             =cut
746              
747             sub is_tree_node {
748 1859     1859 1 2383 my ($self,$node)=@_;
749 1859 100       4523 if ($self->{graph}->in_degree($node) <= 1) {return 1;}
  1365         109767  
750 494         56997 return 0;
751             }
752              
753             =head2 is_hybrid_node
754              
755             Title : is_hybrid_node
756             Usage : my $b=$net->is_hybrid_node($u)
757             Function: tests if $u is a hybrid node in $net
758             Returns : boolean
759             Args : scalar
760              
761             =cut
762              
763             sub is_hybrid_node {
764 19212     19212 1 17279 my ($self,$node)=@_;
765 19212 100       33513 if ($self->{graph}->in_degree($node) > 1) {return 1;}
  3536         367525  
766 15676         1062442 return 0;
767             }
768              
769             sub has_tree_child {
770             # has_tree_child(g,u) returns 1 if u has a tree child in graph g
771             # and 0 otherwise
772 1505     1505 0 149795 my $g=shift(@_);
773 1505         1395 my $node=shift(@_);
774 1505         2712 my @Sons=$g->successors($node);
775 1505         52177 foreach my $son (@Sons) {
776 1923 100       42498 if ($g->in_degree($son)==1) {
777 1505         109328 return 1;
778             }
779             }
780 0         0 return 0;
781             }
782              
783             =head2 is_tree_child
784              
785             Title : is_tree_child
786             Usage : my $b=$net->is_tree_child()
787             Function: tests if $net is a Tree-Child phylogenetic network
788             Returns : boolean
789             Args : Bio::PhyloNetwork
790              
791             =cut
792              
793             sub is_tree_child {
794 9095     9095 1 7280 my ($self)=@_;
795 9095 100       13527 if (defined $self->{is_tree_child}) {
796 8794         13778 return $self->{is_tree_child};
797             }
798 301         467 $self->{is_tree_child}=0;
799 301         401 my $graph=$self->{graph};
800 301         330 foreach my $node (@{$self->{nodes}}) {
  301         666  
801 2443 50 66     25431 return 0 unless ($graph->out_degree($node)==0 ||
802             has_tree_child($graph,$node));
803             }
804 301         6217 $self->{is_tree_child}=1;
805 301         625 return 1;
806             }
807              
808             # Accessors
809              
810             =head2 nodes
811              
812             Title : nodes
813             Usage : my @nodes=$net->nodes()
814             Function: returns the set of nodes of $net
815             Returns : array
816             Args : none
817              
818             =cut
819              
820             sub nodes {
821 12753     12753 1 12758 my ($self)=@_;
822 12753         8588 return @{$self->{nodes}};
  12753         45845  
823             }
824              
825             =head2 leaves
826              
827             Title : leaves
828             Usage : my @leaves=$net->leaves()
829             Function: returns the set of leaves of $net
830             Returns : array
831             Args : none
832              
833             =cut
834              
835             sub leaves {
836 136     136 1 140 my ($self)=@_;
837 136         111 return @{$self->{leaves}};
  136         310  
838             }
839              
840             =head2 roots
841              
842             Title : roots
843             Usage : my @roots=$net->roots()
844             Function: returns the set of roots of $net
845             Returns : array
846             Args : none
847              
848             =cut
849              
850             sub roots {
851 280     280 1 239 my ($self)=@_;
852 280         247 return @{$self->{roots}};
  280         583  
853             }
854              
855             =head2 internal_nodes
856              
857             Title : internal_nodes
858             Usage : my @internal_nodes=$net->internal_nodes()
859             Function: returns the set of internal nodes of $net
860             Returns : array
861             Args : none
862              
863             =cut
864              
865             sub internal_nodes {
866 1189     1189 1 1613 my ($self)=@_;
867 1189         2986 return grep {! $self->is_leaf($_)} $self->nodes();
  8575         12848  
868             }
869              
870             =head2 tree_nodes
871              
872             Title : tree_nodes
873             Usage : my @tree_nodes=$net->tree_nodes()
874             Function: returns the set of tree nodes of $net
875             Returns : array
876             Args : none
877              
878             =cut
879              
880             sub tree_nodes {
881 4     4 1 27 my ($self)=@_;
882 4         9 return grep {$self->is_tree_node($_)} $self->nodes();
  44         63  
883             }
884              
885             =head2 hybrid_nodes
886              
887             Title : hybrid_nodes
888             Usage : my @hybrid_nodes=$net->hybrid_nodes()
889             Function: returns the set of hybrid nodes of $net
890             Returns : array
891             Args : none
892              
893             =cut
894              
895             sub hybrid_nodes {
896 1095     1095 1 1022 my ($self)=@_;
897 1095         1495 return grep {$self->is_hybrid_node($_)} $self->nodes();
  7653         8924  
898             }
899              
900             =head2 graph
901              
902             Title : graph
903             Usage : my $graph=$net->graph()
904             Function: returns the underlying graph of $net
905             Returns : Graph::Directed
906             Args : none
907              
908             =cut
909              
910             sub graph {
911 8004     8004 1 2244025 my ($self)=@_;
912 8004         18897 return $self->{graph};
913             }
914              
915             =head2 edges
916              
917             Title : edges
918             Usage : my @edges=$net->edges()
919             Function: returns the set of edges of $net
920             Returns : array
921             Args : none
922              
923             Each element in the array is an anonimous array whose first element is the
924             head of the edge and the second one is the tail.
925              
926             =cut
927              
928             sub edges {
929 36     36 1 38 my ($self)=@_;
930 36         88 return $self->{graph}->edges();
931             }
932              
933             =head2 tree_edges
934              
935             Title : tree_edges
936             Usage : my @tree_edges=$net->tree_edges()
937             Function: returns the set of tree edges of $net
938             (those whose tail is a tree node)
939             Returns : array
940             Args : none
941              
942             =cut
943              
944             sub tree_edges {
945 4     4 1 544 my ($self)=@_;
946 4         11 return grep {$self->is_tree_node($_->[1])} $self->edges();
  33         496  
947             }
948              
949             =head2 hybrid_edges
950              
951             Title : hybrid_edges
952             Usage : my @hybrid_edges=$net->hybrid_edges()
953             Function: returns the set of hybrid edges of $net
954             (those whose tail is a hybrid node)
955             Returns : array
956             Args : none
957              
958             =cut
959              
960             sub hybrid_edges {
961 4     4 1 4 my ($self)=@_;
962 4         9 return grep {$self->is_hybrid_node($_->[1])} $self->edges();
  33         451  
963             }
964              
965             =head2 explode
966              
967             Title : explode
968             Usage : my @trees=$net->explode()
969             Function: returns the representation of $net by a set of
970             Bio::Tree:Tree objects
971             Returns : array
972             Args : none
973              
974             =cut
975              
976             sub explode {
977 1     1 1 3 my ($self)=@_;
978 1         1 my @trees;
979 1         4 $self->explode_rec(\@trees);
980 1         56 return @trees;
981             }
982              
983             sub explode_rec {
984 15     15 0 20 my ($self,$trees)=@_;
985 15         41 my @h = $self->hybrid_nodes;
986 15 100       47 if (scalar @h) {
987 7         10 my $v = shift @h;
988 7         24 for my $u ($self->{graph}->predecessors($v)) {
989 14         1037 $self->{graph}->delete_edge($u,$v);
990 14         1329 $self->explode_rec($trees);
991 14         708 $self->{graph}->add_edge($u,$v);
992             }
993             } else {
994 8         34 my $io = IO::String->new($self->eNewick);
995 8         493 my $treeio = Bio::TreeIO->new(-format => 'newick', -fh => $io);
996 8         23 my $tree = $treeio->next_tree;
997 8         40 $tree->contract_linear_paths;
998 8         9 push @{$trees}, $tree;
  8         88  
999             }
1000             }
1001              
1002             =head2 mudata
1003              
1004             Title : mudata
1005             Usage : my %mudata=$net->mudata()
1006             Function: returns the representation of $net by its mu-data
1007             Returns : hash
1008             Args : none
1009              
1010             $net-Emudata() returns a hash with keys the nodes of $net and each value is a
1011             muVector object holding its mu-vector.
1012              
1013             =cut
1014              
1015             sub mudata {
1016 1     1 1 3 my ($self)=@_;
1017 1         1 return %{$self->{mudata}};
  1         10  
1018             }
1019              
1020             sub mudata_node {
1021 0     0 0 0 my ($self,$u)=@_;
1022 0         0 return $self->{mudata}{$u};
1023             }
1024              
1025             =head2 heights
1026              
1027             Title : heights
1028             Usage : my %heights=$net->heights()
1029             Function: returns the heights of the nodes of $net
1030             Returns : hash
1031             Args : none
1032              
1033             $net-Eheights() returns a hash with keys the nodes of $net and each value
1034             is its height.
1035              
1036             =cut
1037              
1038             sub heights {
1039 1     1 1 2 my ($self)=@_;
1040 1         1 return %{$self->{h}};
  1         10  
1041             }
1042              
1043             sub height_node {
1044 0     0 0 0 my ($self,$u)=@_;
1045 0         0 return $self->{h}{$u};
1046             }
1047              
1048             =head2 mu_distance
1049              
1050             Title : mu_distance
1051             Usage : my $dist=$net1->mu_distance($net2)
1052             Function: Computes the mu-distance between the networks $net1 and $net2 on
1053             the same set of leaves
1054             Returns : scalar
1055             Args : Bio::PhyloNetwork
1056              
1057             =cut
1058              
1059             sub mu_distance {
1060 4546     4546 1 99618 my ($net1,$net2)=@_;
1061 4546         6581 my @nodes1=$net1->nodes;
1062 4546         6337 my @nodes2=$net2->nodes;
1063 4546         84950 my $comp = Array::Compare->new;
1064             $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1065 4546 50       361485 unless $comp->compare($net1->{leaves},$net2->{leaves});
1066 4546 50       610816 $net1->warn("Not a tree-child phylogenetic network")
1067             unless $net1->is_tree_child();
1068 4546 50       5265 $net2->warn("Not a tree-child phylogenetic network")
1069             unless $net2->is_tree_child();
1070 4546         3406 my @leaves=@{$net1->{leaves}};
  4546         8435  
1071 4546         3446 my %matched1;
1072             my %matched2;
1073 4546         4917 OUTER: foreach my $node1 (@nodes1) {
1074 36890         29502 foreach my $node2 (@nodes2) {
1075 230165 100 66     754857 if (
      100        
1076             (! exists $matched1{$node1}) && (! exists $matched2{$node2}) &&
1077             ($net1->{mudata}{$node1} == $net2->{mudata}{$node2})
1078             ) {
1079 21218         24601 $matched1{$node1}=$node2;
1080 21218         18295 $matched2{$node2}=$node1;
1081 21218         25838 next OUTER;
1082             }
1083             }
1084             }
1085 4546         27034 return (scalar @nodes1)+(scalar @nodes2)-2*(scalar keys %matched1);
1086             }
1087              
1088             =head2 mu_distance_generalized
1089              
1090             Title : mu_distance_generalized
1091             Usage : my $dist=$net1->mu_distance($net2)
1092             Function: Computes the mu-distance between the topological restrictions of
1093             networks $net1 and $net2 on its common set of leaves
1094             Returns : scalar
1095             Args : Bio::PhyloNetwork
1096              
1097             =cut
1098              
1099             sub mu_distance_generalized {
1100 0     0 1 0 my ($net1,$net2)=@_;
1101 0         0 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1102 0         0 return $netr1->mu_distance($netr2);
1103             }
1104              
1105             # mudata_string (code mu_data in a string; useful for isomorphism testing)
1106              
1107             sub mudata_string_node {
1108 4966     4966 0 4133 my ($self,$u)=@_;
1109 4966         8144 return $self->{mudata}->{$u}->display();
1110             }
1111              
1112             sub mudata_string {
1113 48116     48116 0 29504 my ($self)=@_;
1114 48116 100       141788 return $self->{mudata_string} if defined $self->{mudata_string};
1115 1184         3166 my @internal=$self->internal_nodes;
1116 1184         2785 my $mus=$self->{mudata};
1117 1184         5532 @internal=sort {$mus->{$b} <=> $mus->{$a} } @internal;
  6396         13305  
1118 1184         1899 my $str="";
1119 1184         1466 foreach my $node (@internal) {
1120 4966         6407 $str=$str.$self->mudata_string_node($node);
1121             }
1122 1184         2132 $self->{mudata_string}=$str;
1123 1184         3095 return $str;
1124             }
1125              
1126             sub is_mu_isomorphic {
1127 24058     24058 0 21030 my ($net1,$net2)=@_;
1128 24058         22953 return ($net1->mudata_string() eq $net2->mudata_string());
1129             }
1130              
1131             # tripartitions
1132              
1133             sub compute_tripartition_node {
1134 12     12 0 15 my ($self,$u)=@_;
1135             $self->warn("Cannot compute tripartitions on unrooted networks. Will assume one at random")
1136 12 50       34 unless ($self->{numroots} == 1);
1137 12         14 my $root=$self->{roots}->[0];
1138 12         12 my $graph=$self->{graph};
1139 12         27 my $graphPruned=$graph->copy();
1140 12         8501 $graphPruned->delete_vertex($u);
1141 12         1409 my $tripartition="";
1142 12         18 foreach my $leaf (@{$self->{leaves}}) {
  12         22  
1143 36         20 my $type;
1144 36 100       69 if ($graph->is_reachable($u,$leaf)) {
1145 19 100       3151 if ($graphPruned->is_reachable($root,$leaf)) {$type="B";}
  2         3063  
1146 17         29814 else {$type="A";}
1147             }
1148 17         5075 else {$type="C";}
1149 36         69 $tripartition .= $type;
1150             }
1151 12         134 $self->{tripartitions}->{$u}=$tripartition;
1152             }
1153              
1154             sub compute_tripartitions {
1155 2     2 0 3 my ($self)=@_;
1156 2         3 foreach my $node (@{$self->{nodes}}) {
  2         8  
1157 12         21 $self->compute_tripartition_node($node);
1158             }
1159             }
1160              
1161             =head2 tripartitions
1162              
1163             Title : tripartitions
1164             Usage : my %tripartitions=$net->tripartitions()
1165             Function: returns the set of tripartitions of $net
1166             Returns : hash
1167             Args : none
1168              
1169             $net-Etripartitions() returns a hash with keys the nodes of $net and each value
1170             is a string representing the tripartition of the leaves induced by the node.
1171             A string "BCA..." associated with a node u (e.g.) means, the first leaf is in
1172             the set B(u), the second one in C(u), the third one in A(u), and so on.
1173              
1174             =cut
1175              
1176             sub tripartitions {
1177 1     1 1 9 my ($self)=@_;
1178 1 50       9 $self->compute_tripartitions() unless defined $self->{tripartitions};
1179 1         2 return %{$self->{tripartitions}};
  1         11  
1180             }
1181              
1182             # to do: change to tri_distance and test for TC and time-cons
1183              
1184             sub tripartition_error {
1185 1     1 0 6 my ($net1,$net2)=@_;
1186 1         40 my $comp = Array::Compare->new;
1187             $net1->throw("Cannot compare phylogenetic networks on different set of leaves")
1188 1 50       127 unless $comp->compare($net1->{leaves},$net2->{leaves});
1189 1 50       234 $net1->warn("Not a tree-child phylogenetic network")
1190             unless $net1->is_tree_child();
1191 1 50       5 $net2->warn("Not a tree-child phylogenetic network")
1192             unless $net2->is_tree_child();
1193 1 50       5 $net1->warn("Not a time-consistent network")
1194             unless $net1->is_time_consistent();
1195 1 50       4 $net2->warn("Not a time-consistent network")
1196             unless $net2->is_time_consistent();
1197 1 50       4 $net1->compute_tripartitions() unless defined $net1->{tripartitions};
1198 1 50       6 $net2->compute_tripartitions() unless defined $net2->{tripartitions};
1199 1         5 my @edges1=$net1->{graph}->edges();
1200 1         94 my @edges2=$net2->{graph}->edges();
1201 1         53 my ($FN,$FP)=(0,0);
1202 1         2 foreach my $edge1 (@edges1) {
1203 7         7 my $matched=0;
1204 7         4 foreach my $edge2 (@edges2) {
1205 24 100       41 if ($net1->{tripartitions}->{$edge1->[1]} eq
1206             $net2->{tripartitions}->{$edge2->[1]}) {
1207 5         2 $matched=1;
1208 5         6 last;
1209             }
1210             }
1211 7 100       8 if (! $matched) {$FN++;}
  2         3  
1212             }
1213 1         3 foreach my $edge2 (@edges2) {
1214 4         4 my $matched=0;
1215 4         2 foreach my $edge1 (@edges1) {
1216 18 100       29 if ($net1->{tripartitions}->{$edge1->[1]} eq
1217             $net2->{tripartitions}->{$edge2->[1]}) {
1218 3         1 $matched=1;
1219 3         4 last;
1220             }
1221             }
1222 4 100       6 if (! $matched) {$FP++;}
  1         2  
1223             }
1224 1         18 return ($FN/(scalar @edges1)+$FP/(scalar @edges2))/2;
1225             }
1226              
1227             # Time-consistency
1228              
1229             # to do: add weak time consistency
1230              
1231             =head2 is_time_consistent
1232              
1233             Title : is_time_consistent
1234             Usage : my $b=$net->is_time_consistent()
1235             Function: tests if $net is (strong) time-consistent
1236             Returns : boolean
1237             Args : none
1238              
1239             =cut
1240              
1241             sub is_time_consistent {
1242 4     4 1 11 my ($self)=@_;
1243             $self->compute_temporal_representation()
1244 4 100       17 unless exists $self->{has_temporal_representation};
1245 4         16 return $self->{has_temporal_representation};
1246             }
1247              
1248             =head2 temporal_representation
1249              
1250             Title : temporal_representation
1251             Usage : my %time=$net->temporal_representation()
1252             Function: returns a hash containing a temporal representation of $net, or 0
1253             if $net is not time-consistent
1254             Returns : hash
1255             Args : none
1256              
1257             =cut
1258              
1259             sub temporal_representation {
1260 1     1 1 3 my ($self)=@_;
1261 1 50       2 if ($self->is_time_consistent) {
1262 1         2 return %{$self->{temporal_representation}};
  1         6  
1263             }
1264 0         0 return 0;
1265             }
1266              
1267             sub compute_temporal_representation {
1268 3     3 0 4 my ($self)=@_;
1269 3         9 my $quotient=Graph::Directed->new();
1270 3         330 my $classes=find_classes($self);
1271 3         3 my %repr;
1272 3         8 map {$repr{$_}=$classes->{$_}[0]} $self->nodes();
  19         24  
1273 3         9 foreach my $e ($self->tree_edges()) {
1274 14         440 $quotient->add_edge($repr{$e->[0]},$repr{$e->[1]});
1275             }
1276 3         113 my %temp;
1277 3         15 my $depth=0;
1278 3         9 while ($quotient->vertices()) {
1279 9 50       236 if (my @svs=$quotient->predecessorless_vertices()) {
1280 9         651 foreach my $sv (@svs) {
1281 15         21 $temp{$sv}=$depth;
1282             }
1283 9         21 $quotient->delete_vertices(@svs);
1284             } else {
1285 0         0 return 0;
1286             }
1287 9         1102 $depth++;
1288             }
1289 3         47 foreach my $node (@{$self->{nodes}}) {
  3         8  
1290 19         19 $temp{$node}=$temp{$repr{$node}}
1291             }
1292 3         5 $self->{temporal_representation}=\%temp;
1293 3         17 $self->{has_temporal_representation}=1;
1294             }
1295              
1296             sub find_classes {
1297 3     3 0 5 my ($self)=@_;
1298 3         3 my $classes={};
1299 3         9 map {$classes->{$_}=[$_]} $self->nodes();
  19         27  
1300 3         16 foreach my $e ($self->hybrid_edges()) {
1301 4         13 $classes=join_classes($classes,$e->[0],$e->[1]);
1302             }
1303 3         7 return $classes;
1304             }
1305              
1306             sub join_classes {
1307 4     4 0 7 my ($classes,$u,$v)=@_;
1308 4         4 my @clu=@{$classes->{$u}};
  4         7  
1309 4         3 my @clv=@{$classes->{$v}};
  4         6  
1310 4         8 my @cljoin=(@clu,@clv);
1311 4         4 map {$classes->{$_}=\@cljoin} @cljoin;
  10         14  
1312 4         7 return $classes;
1313             }
1314              
1315             # alignment
1316              
1317             =head2 contract_elementary
1318              
1319              
1320             Title : contract_elementary
1321             Usage : my ($contracted,$blocks)=$net->contract_elementary();
1322             Function: Returns the network $contracted, obtained by contracting elementary
1323             paths of $net into edges. The reference $blocks points to a hash
1324             where, for each node of $contracted, gives the corresponding nodes
1325             of $net that have been deleted.
1326             Returns : Bio::PhyloNetwork,reference to hash
1327             Args : none
1328              
1329             =cut
1330              
1331             sub contract_elementary {
1332 4     4 1 4 my ($self)=@_;
1333              
1334 4         10 my $contracted=$self->graph->copy();
1335 4         4672 my @nodes=$self->nodes();
1336 4         9 my $mus=$self->{mudata};
1337 4         6 my $hs=$self->{h};
1338 4         4 my %blocks;
1339 4         8 foreach my $u (@nodes) {
1340 44         50 $blocks{$u}=[$u];
1341             }
1342 4         10 my @elementary=grep { $contracted->out_degree($_) == 1} $self->tree_nodes();
  36         2095  
1343 4         351 @elementary=sort {$mus->{$b} <=> $mus->{$a} ||
1344 0 0       0 $hs->{$b} <=> $hs->{$a}} @elementary;
1345 4         6 foreach my $elem (@elementary) {
1346 0         0 my @children=$contracted->successors($elem);
1347 0         0 my $child=$children[0];
1348 0 0       0 if ($contracted->in_degree($elem) == 1) {
1349 0         0 my @parents=$contracted->predecessors($elem);
1350 0         0 my $parent=$parents[0];
1351 0         0 $contracted->add_edge($parent,$child);
1352             }
1353 0         0 $contracted->delete_vertex($elem);
1354 0         0 my @blch=@{$blocks{$child}};
  0         0  
1355 0         0 my @blem=@{$blocks{$elem}};
  0         0  
1356 0         0 $blocks{$child}=[@blem,@blch];
1357 0         0 delete $blocks{$elem};
1358             }
1359 4         18 my $contr=Bio::PhyloNetwork->new(-graph => $contracted);
1360 4         70 return $contr,\%blocks;
1361             }
1362              
1363             =head2 optimal_alignment
1364              
1365             Title : optimal_alignment
1366             Usage : my ($weight,$alignment,$wgts)=$net->optimal_alignment($net2)
1367             Function: returns the total weight of an optimal alignment,
1368             the alignment itself, and partial weights
1369             between the networks $net1 and $net2 on the same set of leaves.
1370             An optional argument allows one to use the Manhattan (default) or the
1371             Hamming distance between mu-vectors.
1372             Returns : scalar,reference to hash,reference to hash
1373             Args : Bio::PhyloNetwork,
1374             -metric => string (optional)
1375              
1376             Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1377              
1378             =cut
1379              
1380             sub optimal_alignment {
1381 2     2 1 1222 my ($net1,$net2,%params)=@_;
1382              
1383 2         7 my ($net1cont,$blocks1)=contract_elementary($net1);
1384 2         5 my ($net2cont,$blocks2)=contract_elementary($net2);
1385 2         9 my ($wc,$alignc,$weightc)=
1386             optimal_alignment_noelementary($net1cont,$net2cont,%params);
1387 2         5 my %alignment=();
1388 2         3 my $totalweigth=0;
1389 2         3 my %weigths=();
1390 2         11 foreach my $u1 (keys %$alignc) {
1391 18         13 my $u2=$alignc->{$u1};
1392 18         13 my @block1=@{$blocks1->{$u1}};
  18         29  
1393 18         10 my @block2=@{$blocks2->{$u2}};
  18         50  
1394 18   66     46 while (@block1 && @block2) {
1395 18         20 my $u1dc=pop @block1;
1396 18         13 my $u2dc=pop @block2;
1397 18         32 $alignment{$u1dc}=$u2dc;
1398 18         18 $weigths{$u1dc}=$weightc->{$u1};
1399 18         39 $totalweigth+=$weigths{$u1dc};
1400             }
1401             }
1402 2         33 return $totalweigth,\%alignment,\%weigths;
1403             }
1404              
1405             sub optimal_alignment_noelementary {
1406 2     2 0 7 my ($net1,$net2,%params)=@_;
1407              
1408 2         90 my $comp = Array::Compare->new;
1409             $net1->throw("Cannot align phylogenetic networks on different set of leaves")
1410 2 50       248 unless $comp->compare($net1->{leaves},$net2->{leaves});
1411 2         348 my $distance;
1412 2 100 66     13 if ((defined $params{-metric})and ($params{-metric} eq 'Hamming')) {
1413 1         2 $distance='Hamming';
1414             } else {
1415 1         4 $distance='Manhattan';
1416             }
1417 2         4 my $numleaves=$net1->{numleaves};
1418 2         11 my @nodes1=$net1->internal_nodes();
1419 2         8 my @nodes2=$net2->internal_nodes();
1420 2         4 my $numnodes1=@nodes1;
1421 2         3 my $numnodes2=@nodes2;
1422 2         4 my @matrix=();
1423 2         6 for (my $i=0;$i<$numnodes1;$i++) {
1424 18         17 my @row=();
1425 18         26 for (my $j=0;$j<$numnodes2;$j++) {
1426 90         129 push @row,weight($net1,$nodes1[$i],$net2,$nodes2[$j],$distance);
1427             }
1428 18         39 push @matrix,\@row;
1429             }
1430 2         3 my @alignment=();
1431 2         29 Algorithm::Munkres::assign(\@matrix,\@alignment);
1432 2         6592 my %alignmenthash;
1433             my %weighthash;
1434 2         4 my $totalw=0;
1435 2         2 foreach my $leaf (@{$net1->{leaves}}) {
  2         6  
1436 8         12 $alignmenthash{$leaf}=$leaf;
1437 8         11 $weighthash{$leaf}=0;
1438             }
1439 2         9 for (my $i=0;$i<$numnodes1;$i++) {
1440 18 100       36 if (defined $nodes2[$alignment[$i]]) {
1441 10         18 $alignmenthash{$nodes1[$i]}=$nodes2[$alignment[$i]];
1442 10         18 $weighthash{$nodes1[$i]}=$matrix[$i][$alignment[$i]];
1443 10         14 $totalw += $matrix[$i][$alignment[$i]];
1444             }
1445             }
1446 2         24 return $totalw,\%alignmenthash,\%weighthash;
1447             }
1448              
1449             =head2 optimal_alignment_generalized
1450              
1451             Title : optimal_alignment_generalized
1452             Usage : my ($weight,%alignment)=$net->optimal_alignment_generalized($net2)
1453             Function: returns the wieght of an optimal alignment, and the alignment itself,
1454             between the topological restriction of the networks $net1 and $net2
1455             on the set of common leaves.
1456             An optional argument allows one to use the Manhattan (default) or the
1457             Hamming distance between mu-vectors.
1458             Returns : scalar,hash
1459             Args : Bio::PhyloNetwork,
1460             -metric => string (optional)
1461              
1462             Supported strings for the -metric parameter are 'Manhattan' or 'Hamming'.
1463              
1464             =cut
1465              
1466             sub optimal_alignment_generalized {
1467 0     0 1 0 my ($net1,$net2,%params)=@_;
1468 0         0 my ($netr1,$netr2)=$net1->topological_restriction($net2);
1469 0         0 return $netr1->optimal_alignment($netr2,%params);
1470             }
1471              
1472             sub weight {
1473 90     90 0 90 my ($net1,$v1,$net2,$v2,$distance)=@_;
1474 90         60 my $w;
1475 90 50       113 if (! defined $distance) {
1476 0         0 $distance='Manhattan';
1477             }
1478 90 100       113 if ($distance eq 'Hamming') {
1479 45         126 $w=$net1->{mudata}->{$v1}->hamming($net2->{mudata}->{$v2});
1480             } else {
1481 45         112 $w=$net1->{mudata}->{$v1}->manhattan($net2->{mudata}->{$v2});
1482             }
1483 90 100 100     118 if (($net1->is_tree_node($v1) && $net2->is_hybrid_node($v2)) ||
      100        
      66        
1484             ($net2->is_tree_node($v2) && $net1->is_hybrid_node($v1))
1485             )
1486             {
1487 36         65 $w +=1/(2*$net1->{numleaves});
1488             }
1489 90         219 return $w;
1490             }
1491              
1492              
1493             =head2 topological_restriction
1494              
1495             Title : topological_restriction
1496             Usage : my ($netr1,$netr2)=$net1->topological_restriction($net2)
1497             Function: returns the topological restriction of $net1 and $net2 on its
1498             common set of leaves
1499             Returns : Bio::PhyloNetwork, Bio::PhyloNetwork
1500             Args : Bio::PhyloNetwork
1501              
1502             =cut
1503              
1504             sub topological_restriction {
1505 0     0 1 0 my ($net1,$net2)=@_;
1506              
1507 0         0 my @leaves1=$net1->leaves();
1508 0         0 my @leaves2=$net2->leaves();
1509 0         0 my $numleaves1=scalar @leaves1;
1510 0         0 my $numleaves2=scalar @leaves2;
1511 0         0 my %position1;
1512 0         0 for (my $i=0; $i<$numleaves1; $i++) {
1513 0         0 $position1{$leaves1[$i]}=$i;
1514             }
1515 0         0 my %position2;
1516 0         0 my @commonleaves=();
1517 0         0 for (my $j=0; $j<$numleaves2; $j++) {
1518 0 0       0 if (defined $position1{$leaves2[$j]}) {
1519 0         0 push @commonleaves,$leaves2[$j];
1520 0         0 $position2{$leaves2[$j]}=$j;
1521             }
1522             }
1523 0         0 my $graphred1=$net1->{graph}->copy();
1524 0         0 my $graphred2=$net2->{graph}->copy();
1525             OUTER1:
1526 0         0 foreach my $u ($graphred1->vertices()) {
1527 0         0 my $mu=$net1->mudata_node($u);
1528 0         0 foreach my $leaf (@commonleaves) {
1529 0 0       0 if ($mu->[$position1{$leaf}]>0) {
1530 0         0 next OUTER1;
1531             }
1532             }
1533 0         0 $graphred1->delete_vertex($u);
1534             }
1535             OUTER2:
1536 0         0 foreach my $u ($graphred2->vertices()) {
1537 0         0 my $mu=$net2->mudata_node($u);
1538 0         0 foreach my $leaf (@commonleaves) {
1539 0 0       0 if ($mu->[$position2{$leaf}]>0) {
1540 0         0 next OUTER2;
1541             }
1542             }
1543 0         0 $graphred2->delete_vertex($u);
1544             }
1545 0         0 my $netr1=Bio::PhyloNetwork->new(-graph => $graphred1);
1546 0         0 my $netr2=Bio::PhyloNetwork->new(-graph => $graphred2);
1547 0         0 return ($netr1,$netr2);
1548             }
1549              
1550             # Functions for eNewick representation
1551              
1552             =head2 eNewick
1553              
1554             Title : eNewick
1555             Usage : my $str=$net->eNewick()
1556             Function: returns the eNewick representation of $net without labeling
1557             internal tree nodes
1558             Returns : string
1559             Args : none
1560              
1561             =cut
1562              
1563             sub eNewick {
1564 144     144 1 1378 my ($self)=@_;
1565 144         155 my $str="";
1566 144         187 my $seen={};
1567 144         306 foreach my $root ($self->roots()) {
1568 144         320 $str=$str.$self->eNewick_aux($root,$seen,undef)."; ";
1569             }
1570 144         511 return $str;
1571             }
1572              
1573             sub eNewick_aux {
1574 1266     1266 0 1374 my ($self,$node,$seen,$parent)=@_;
1575 1266         1016 my $str='';
1576 1266 100 100     1464 if ($self->is_leaf($node) ||
1577             (defined $seen->{$node}) )
1578             {
1579 606         851 $str=make_label($self,$parent,$node);
1580             }
1581             else {
1582 660         721 $seen->{$node}=1;
1583 660         1340 my @sons=$self->{graph}->successors($node);
1584 660         20030 $str="(";
1585 660         722 foreach my $son (@sons) {
1586 1122         1722 $str=$str.$self->eNewick_aux($son,$seen,$node).",";
1587             }
1588 660         698 chop($str);
1589 660         812 $str.=")".make_label($self,$parent,$node);
1590             }
1591 1266         2852 return $str;
1592             }
1593              
1594             sub make_label {
1595 1266     1266 0 1220 my ($self,$parent,$node)=@_;
1596 1266         1090 my $str='';
1597 1266 100       1720 if ($self->is_hybrid_node($node)) {
1598 300         417 my $lbl=$self->{labels}->{$node};
1599 300 100       968 if ($lbl =~ /#/) {
1600 294         315 $lbl='';
1601             }
1602 300         376 $str.=$lbl; #$self->{labels}->{$node};
1603 300         274 $str.='#';
1604 300 100 66     765 if ((defined $parent) &&
1605             ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1606 6         333 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1607             }
1608 300         15272 $str.=substr $node,1;
1609             } else {
1610 966         1365 $str.=$self->{labels}->{$node};
1611             }
1612 1266 50 66     2898 if ((defined $parent) &&
1613             ($self->graph->has_edge_weight($parent,$node))) {
1614 0         0 $str.=":".$self->graph->get_edge_weight($parent,$node);
1615             }
1616 1266         110942 return $str;
1617             }
1618              
1619             =head2 eNewick_full
1620              
1621             Title : eNewick_full
1622             Usage : my $str=$net->eNewick_full()
1623             Function: returns the eNewick representation of $net labeling
1624             internal tree nodes
1625             Returns : string
1626             Args : none
1627              
1628             =cut
1629              
1630             sub eNewick_full {
1631 136     136 1 159 my ($self)=@_;
1632 136         147 my $str="";
1633 136         175 my $seen={};
1634 136         215 foreach my $root ($self->roots()) {
1635 136         268 $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1636             }
1637 136         442 return $str;
1638             }
1639              
1640             sub eNewick_full_aux {
1641 1162     1162 0 1167 my ($self,$node,$seen,$parent)=@_;
1642 1162         958 my $str='';
1643 1162 100 100     1444 if ($self->is_leaf($node) ||
1644             (defined $seen->{$node}) )
1645             {
1646 574         750 $str=make_label_full($self,$parent,$node);
1647             }
1648             else {
1649 588         617 $seen->{$node}=1;
1650 588         1103 my @sons=$self->{graph}->successors($node);
1651 588         22085 $str="(";
1652 588         644 foreach my $son (@sons) {
1653 1026         1515 $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1654             }
1655 588         534 chop($str);
1656 588         674 $str.=")".make_label_full($self,$parent,$node);
1657             }
1658 1162         2423 return $str;
1659             }
1660              
1661             sub make_label_full {
1662 1162     1162 0 1099 my ($self,$parent,$node)=@_;
1663 1162         983 my $str='';
1664 1162 100       1534 if ($self->is_hybrid_node($node)) {
1665 300         391 my $lbl=$self->{labels}->{$node};
1666 300 100       807 if ($lbl =~ /#/) {
1667 294         335 $lbl='';
1668             }
1669 300         280 $str.=$lbl; #$self->{labels}->{$node};
1670 300         279 $str.='#';
1671 300 100 66     792 if ((defined $parent) &&
1672             ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1673 6         348 $str.=$self->graph->get_edge_attribute($parent,$node,'type');
1674             }
1675 300         15159 $str.=substr $node,1;
1676             } else {
1677 862 100 66     3109 if ((defined $self->{labels}->{$node})&&($self->{labels}->{$node} ne '')) {
1678 858         979 $str.=$self->{labels}->{$node};
1679             }
1680             else {
1681 4         5 $str.=$node;
1682             }
1683             }
1684 1162 50 66     2562 if ((defined $parent) &&
1685             ($self->graph->has_edge_weight($parent,$node))) {
1686 0         0 $str.=":".$self->graph->get_edge_weight($parent,$node);
1687             }
1688 1162         53444 return $str;
1689             }
1690              
1691             # sub eNewick_full {
1692             # my ($self)=@_;
1693             # my $str="";
1694             # my $seen={};
1695             # foreach my $root ($self->roots()) {
1696             # $str=$str.$self->eNewick_full_aux($root,$seen,undef)."; ";
1697             # }
1698             # return $str;
1699             # }
1700              
1701             # sub eNewick_full_aux {
1702             # my ($self,$node,$seen,$parent)=@_;
1703             # my $str;
1704             # if ($self->is_leaf($node) ||
1705             # (defined $seen->{$node}) )
1706             # {
1707             # if ($self->is_hybrid_node($node)) {
1708             # my $tag=substr $node,1;
1709             # if ((defined $parent) &&
1710             # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1711             # $str='#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1712             # } else {
1713             # $str=$node;
1714             # }
1715             # } else {
1716             # $str=$node;
1717             # }
1718             # }
1719             # else {
1720             # $seen->{$node}=1;
1721             # my @sons=$self->{graph}->successors($node);
1722             # $str="(";
1723             # foreach my $son (@sons) {
1724             # $str=$str.$self->eNewick_full_aux($son,$seen,$node).",";
1725             # }
1726             # chop($str);
1727             # if ($self->is_hybrid_node($node)) {
1728             # my $tag=substr $node,1;
1729             # if ((defined $parent) &&
1730             # ($self->graph->has_edge_attribute($parent,$node,'type'))) {
1731             # $str.=')#'.$self->graph->get_edge_attribute($parent,$node,'type').$tag;
1732             # } else {
1733             # $str.=")$node";
1734             # }
1735             # } else {
1736             # $str.=")$node";
1737             # }
1738             # }
1739             # if ((defined $parent) &&
1740             # ($self->graph->has_edge_weight($parent,$node))) {
1741             # $str.=":".$self->graph->get_edge_weight($parent,$node);
1742             # }
1743             # return $str;
1744             # }
1745              
1746              
1747             # displaying data
1748              
1749 5     5   33 use overload '""' => \&display;
  5         10  
  5         40  
1750              
1751             =head2 display
1752              
1753             Title : display
1754             Usage : my $str=$net->display()
1755             Function: returns a string containing all the available information on $net
1756             Returns : string
1757             Args : none
1758              
1759             =cut
1760              
1761             sub display {
1762 135     135 1 360 my ($self)=@_;
1763 135         164 my $str="";
1764 135         195 my $graph=$self->{graph};
1765 135         254 my @leaves=$self->leaves();
1766 135         145 my @nodes=@{$self->{nodes}};
  135         273  
1767 135         344 $str.= "Leaves:\t@leaves\n";
1768 135         294 $str.= "Nodes:\t@nodes\n";
1769 135         401 $str.= "Graph:\t$graph\n";
1770 135         42460 $str.= "eNewick:\t".$self->eNewick()."\n";
1771 135         299 $str.= "Full eNewick:\t".$self->eNewick_full()."\n";
1772 135         219 $str.= "Mu-data and heights:\n";
1773 135         178 foreach my $node (@nodes) {
1774 999         943 $str.= "v=$node: ";
1775 999 50       1161 if (exists $self->{labels}->{$node}) {
1776 999         1131 $str.="\tlabel=".$self->{labels}->{$node}.",";
1777             } else {
1778 0         0 $str.="\tlabel=(none),";
1779             }
1780 999         2079 $str.= "\th=".$self->{h}->{$node}.", \tmu=".$self->{mudata}->{$node}."\n";
1781             }
1782 135 50       299 if (exists $self->{has_temporal_representation}) {
1783 0         0 $str.= "Temporal representation:\n";
1784 0 0       0 if ($self->{has_temporal_representation}) {
1785 0         0 foreach my $node (@nodes) {
1786 0         0 $str.= "v=$node; ";
1787 0         0 $str.= "\tt=".$self->{temporal_representation}->{$node}."\n";
1788             }
1789             } else {
1790 0         0 $str.= "Does not exist.\n";
1791             }
1792             }
1793 135 50       278 if (exists $self->{tripartitions}) {
1794 0         0 $str.= "Tripartitions:\n";
1795 0         0 foreach my $node (@nodes) {
1796 0         0 $str.= "v=$node; ";
1797 0         0 $str.= "\ttheta=".$self->{tripartitions}->{$node}."\n";
1798             }
1799             }
1800 135         454 return $str;
1801             }
1802              
1803             1;