| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Algorithm::MLCS; |
|
2
|
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
53490
|
use strict; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
83
|
|
|
4
|
2
|
|
|
2
|
|
11
|
use warnings FATAL => 'all'; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
93
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
11
|
use vars qw/ $VERSION @ISA @EXPORT /; |
|
|
2
|
|
|
|
|
15
|
|
|
|
2
|
|
|
|
|
1540
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
require Exporter; |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
@ISA = qw/ Exporter /; |
|
11
|
|
|
|
|
|
|
@EXPORT = qw/ lcs /; |
|
12
|
|
|
|
|
|
|
$VERSION = '1.02'; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Gets arrayref of sequences (arrayrefs) and return LCS array in list context |
|
15
|
|
|
|
|
|
|
# or length of LCS in scalar context |
|
16
|
|
|
|
|
|
|
sub lcs { |
|
17
|
6
|
|
|
6
|
1
|
329771
|
my ( @seq, @lcs ) = map { _build_seq($_) } _get_dict( $_[0] ); |
|
|
110
|
|
|
|
|
498
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
6
|
|
33
|
|
|
2360
|
while ( @seq && !( grep { !@$_ } @seq ) ) { |
|
|
1600
|
|
|
|
|
2916
|
|
|
20
|
89
|
|
|
|
|
121
|
my %dict = ( %{ $seq[0][0] } ); |
|
|
89
|
|
|
|
|
2387
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
89
|
|
|
|
|
446
|
for my $s ( @seq[ 1 .. $#seq ] ) { |
|
23
|
20875
|
100
|
|
|
|
69074
|
%dict = map { |
|
24
|
21114
|
|
|
|
|
35376
|
$_ => $dict{$_} > $s->[0]{$_} |
|
25
|
|
|
|
|
|
|
? $s->[0]{$_} : $dict{$_} |
|
26
|
1511
|
|
|
|
|
4935
|
} grep { $s->[0]{$_} } keys %dict; |
|
27
|
|
|
|
|
|
|
} |
|
28
|
|
|
|
|
|
|
|
|
29
|
89
|
100
|
|
|
|
351
|
last unless %dict; |
|
30
|
|
|
|
|
|
|
|
|
31
|
83
|
|
|
|
|
461
|
push @lcs, ( sort { $dict{$b} <=> $dict{$a} } keys %dict )[0]; |
|
|
3175
|
|
|
|
|
4141
|
|
|
32
|
|
|
|
|
|
|
|
|
33
|
83
|
|
|
|
|
245
|
for (@seq) { |
|
34
|
1490
|
100
|
|
|
|
3216
|
while (@$_) { last if @$_ == ( shift @$_ )->{ $lcs[-1] } } |
|
|
18116
|
|
|
|
|
100563
|
|
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
|
|
38
|
6
|
50
|
|
|
|
5587
|
wantarray ? @lcs : scalar @lcs; |
|
39
|
|
|
|
|
|
|
} |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Auxiliary function that gets single sequence arrayref and |
|
42
|
|
|
|
|
|
|
# build specific data structure for further processing |
|
43
|
|
|
|
|
|
|
# in order to find LCS |
|
44
|
|
|
|
|
|
|
sub _build_seq { |
|
45
|
110
|
|
|
110
|
|
328
|
my ( $seq, %dict, @seq_st ) = @_; |
|
46
|
|
|
|
|
|
|
|
|
47
|
110
|
|
|
|
|
178
|
for ( 0 .. $#{$seq} ) { push @{ $dict{ $seq->[$_] } }, $_ } |
|
|
110
|
|
|
|
|
543
|
|
|
|
22010
|
|
|
|
|
21599
|
|
|
|
22010
|
|
|
|
|
52662
|
|
|
48
|
|
|
|
|
|
|
|
|
49
|
110
|
|
|
|
|
234
|
for my $i ( 0 .. $#{$seq} ) { |
|
|
110
|
|
|
|
|
308
|
|
|
50
|
22010
|
|
|
|
|
24150
|
my %tok; |
|
51
|
22010
|
|
|
|
|
76579
|
for ( keys %dict ) { |
|
52
|
326087
|
|
|
|
|
329331
|
$tok{$_} = @{$seq} - $dict{$_}[0]; |
|
|
326087
|
|
|
|
|
735816
|
|
|
53
|
326087
|
100
|
|
|
|
883104
|
if ( $dict{$_}[0] == $i ) { |
|
54
|
22010
|
|
|
|
|
22602
|
shift @{ $dict{$_} }; |
|
|
22010
|
|
|
|
|
32431
|
|
|
55
|
22010
|
100
|
|
|
|
26520
|
delete $dict{$_} if !@{ $dict{$_} }; |
|
|
22010
|
|
|
|
|
68860
|
|
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
} |
|
58
|
22010
|
|
|
|
|
81438
|
$seq_st[$i] = \%tok; |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
110
|
|
|
|
|
896
|
return \@seq_st; |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# Auxiliary function that gets arrayref of sequences (arrayrefs), |
|
65
|
|
|
|
|
|
|
# builds dictionary of unique tokens presented in all given sequences |
|
66
|
|
|
|
|
|
|
# and returns the arrayref of new sequences with only tokens from dictionary |
|
67
|
|
|
|
|
|
|
sub _get_dict { |
|
68
|
6
|
|
|
6
|
|
17
|
my $seq = shift; |
|
69
|
6
|
|
|
|
|
12
|
my %dict = map { $_ => 1 } @{ $seq->[0] }; |
|
|
1201
|
|
|
|
|
2339
|
|
|
|
6
|
|
|
|
|
23
|
|
|
70
|
|
|
|
|
|
|
|
|
71
|
6
|
|
|
|
|
131
|
for ( @{$seq}[ 1 .. $#{$seq} ] ) { |
|
|
6
|
|
|
|
|
26
|
|
|
|
6
|
|
|
|
|
26
|
|
|
72
|
104
|
|
|
|
|
252
|
%dict = map { $_ => 1 } grep { $dict{$_} } @$_; |
|
|
20809
|
|
|
|
|
38365
|
|
|
|
20809
|
|
|
|
|
32146
|
|
|
73
|
104
|
50
|
|
|
|
3986
|
last unless %dict; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
|
|
76
|
6
|
|
|
|
|
23
|
return map { [ grep { $dict{$_} } @$_ ] } @{$seq}; |
|
|
110
|
|
|
|
|
279
|
|
|
|
22010
|
|
|
|
|
36407
|
|
|
|
6
|
|
|
|
|
19
|
|
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
1; |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 NAME |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Algorithm::MLCS - Fast heuristic algorithm for finding Longest Common Subsequence |
|
84
|
|
|
|
|
|
|
of multiple sequences |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 VERSION |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Version 1.02 |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
use Data::Dumper; |
|
93
|
|
|
|
|
|
|
use Algorithm::MLCS; |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
my @seqs = ( |
|
96
|
|
|
|
|
|
|
[ qw/a b c d f g h j q z/ ], |
|
97
|
|
|
|
|
|
|
[ qw/a b c d f g h j q z/ ], |
|
98
|
|
|
|
|
|
|
[ qw/a b c x f h j q z/ ], |
|
99
|
|
|
|
|
|
|
[ qw/a b c f g j q z/ ], |
|
100
|
|
|
|
|
|
|
); |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
my @lcs = lcs( \@seqs ); |
|
103
|
|
|
|
|
|
|
my $lcs_length = lcs( \@seqs ); |
|
104
|
|
|
|
|
|
|
print Dumper( \@lcs ); |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 ABSTRACT |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Finding the longest common subsequence (LCS) for the general case of an arbitrary |
|
109
|
|
|
|
|
|
|
number of input sequences is an NP-hard problem. Algorithm::MLCS implements a fast |
|
110
|
|
|
|
|
|
|
heuristic algorithm that addresses the general case of multiple sequences. |
|
111
|
|
|
|
|
|
|
It is able to extract common subsequence that is close to the optimal ones. |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 METHODS |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head2 lcs ( \@seqs ) |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Finds a Longest Common Subsequence of multiple sequences given by @seqs arrayref. |
|
118
|
|
|
|
|
|
|
Each element of @seqs is arrayref that represents the one of multiple sequences |
|
119
|
|
|
|
|
|
|
(e.g. [ ['a', 'b', 'c'], ['a', 'c', 'd', 'e'], ... ]). In list context it returns |
|
120
|
|
|
|
|
|
|
LCS array, in scalar - the length of LCS. |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Algorithm::LCS |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head1 AUTHOR |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Slava Moiseev, C<< >> |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Copyright 2012 Slava Moiseev. |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
|
135
|
|
|
|
|
|
|
under the same terms as Perl itself. |
|
136
|
|
|
|
|
|
|
|