File Coverage

blib/lib/String/Markov.pm

Criterion	Covered	Total	%
statement	92	92	100.0
branch	22	22	100.0
condition	11	12	91.6
subroutine	13	13	100.0
pod	4	8	50.0
total	142	147	96.6

line	stmt	bran	cond	sub	pod	time	code
1							package String::Markov;
2
3							# ABSTRACT: A Moo-based, text-oriented Markov Chain module
4
5							our $VERSION = 0.007;
6
7	1			1		801	use 5.010;
	1					3
	1					37
8	1			1		570	use Moo;
	1					11369
	1					6
9	1			1		1691	use namespace::autoclean;
	1					11470
	1					5
10
11	1			1		11116	use Unicode::Normalize qw(normalize);
	1					2002
	1					94
12	1			1		6	use List::Util qw(sum);
	1					1
	1					1139
13
14							has normalize => (is => 'rw', default => sub { 'C' });
15							has do_chomp => (is => 'rw', default => sub { 1 });
16							has null => (is => 'ro', default => sub { "\0" });
17							has order => (is => 'ro', isa => sub {
18							die "Need an integer greater than zero" if !$_[0] \|\| $_[0] =~ /\D/;
19							}, default => sub { 2 });
20
21							has ['split_sep','join_sep'] => (
22							is => 'rw',
23							default => sub { undef }
24							);
25
26							has ['transition_count','row_sum'] => (
27							is => 'ro',
28							isa => sub { die "Need a hash ref" if ref $_[0] ne 'HASH'; },
29							default => sub { {} }
30							);
31
32							around BUILDARGS => sub {
33							my ($orig, $class, @arg) = @_;
34							my %ahash;
35
36							%ahash = @arg == 1 ? %{$arg[0]} : @arg;
37
38							my $sep = delete $ahash{sep} // '';
39							die "ERR: sep argument must be scalar; did you mean to set split_sep instead?" if ref $sep;
40							$ahash{split_sep} //= $sep;
41							$ahash{join_sep} //= $sep;
42
43							return $class->$orig(\%ahash);
44							};
45
46							sub join_prob {
47	149			149	0	133	my ($self, $orig_prob) = @_;
48	149					98	my %p;
49
50	149					105	@p{@{$orig_prob->[0]}} = @{$orig_prob->[1]};
	149					260
	149					151
51
52	149					256	return \%p;
53							}
54
55							sub split_prob {
56	227			227	0	175	my ($self, $orig_prob) = @_;
57
58							return [
59	227					736	[keys %$orig_prob],
60							[values %$orig_prob],
61							];
62							}
63
64							sub split_all_prob {
65	3			3	0	4	my $self = shift;
66	3					5	my $tc = $self->transition_count;
67	3					5	my $nt = {};
68
69	3					13	while (my ($state, $prob) = each %$tc) {
70	52					55	$nt->{$state} = $self->split_prob($prob);
71							}
72
73	3					47	%$tc = %$nt;
74							}
75
76							sub split_line {
77	22			22	1	27	my ($self, $sample) = @_;
78	22	100				60	if (my $norm = $self->normalize) {
79	20					61	$sample = normalize($norm, $sample);
80							}
81	22					503	return split($self->split_sep, $sample);
82							}
83
84							sub add_sample {
85	26			26	1	10510	my ($self, $sample) = @_;
86	26					53	my $n = $self->order;
87	26					35	my $null = $self->null;
88
89	26					32	my $sref = ref $sample;
90	26					52	my @nms = ($null,) x $n;
91
92	26	100				73	if ($sref eq 'ARRAY') {
		100
93	2					6	push @nms, @$sample;
94							} elsif (!$sref) {
95	23	100				76	die 'ERR: missing split separator,' if !defined $self->split_sep;
96	22					48	push @nms, $self->split_line($sample);
97							} else {
98	1					9	die "ERR: bad sample type $sref";
99							}
100
101	24					39	push @nms, $null;
102
103	24		100			82	my $sep = $self->join_sep // '';
104	24					40	my $count = $self->transition_count;
105	24					36	my $sum = $self->row_sum;
106	24					64	for my $i (0 .. ($#nms - $n)) {
107	287					426	my $cur = join($sep, @nms[$i .. ($i + $n - 1)]);
108	287					260	my $nxt = $nms[$i + $n];
109	287					268	my $prob = $count->{$cur};
110	287	100	100			792	if ($prob && ref $prob ne 'HASH') {
111	149					191	$count->{$cur} = $self->join_prob($prob);
112							}
113	287					416	++$count->{$cur}{$nxt};
114	287					385	++$sum->{$cur};
115							}
116
117	24					136	return $self;
118							}
119
120							sub add_files {
121	3			3	1	942	my ($self, @files) = @_;
122	3					8	my $do_chomp = $self->do_chomp;
123
124	3					5	local @ARGV = @files;
125	3					143	while(my $sample = <>) {
126	6	100				13	chomp $sample if $do_chomp;
127	6					12	$self->add_sample($sample);
128							}
129
130	3					8	$self->split_all_prob();
131
132	3					21	return $self;
133							}
134
135							sub sample_next_state {
136	297			297	0	1216	my ($self, @cur_state) = @_;
137	297	100				609	die "ERR: wrong amount of state" if @cur_state != $self->order;
138
139	294					337	my $count = $self->transition_count;
140	294					289	my $sum = $self->row_sum;
141
142	294		100			665	my $cur = join($self->join_sep // '', @cur_state);
143	294					369	my $thresh = $sum->{$cur};
144	294	100				469	return undef if !$thresh;
145
146	278					323	$thresh *= rand();
147
148	278					284	my $prob = $count->{$cur};
149	278	100				451	if (ref $prob ne 'ARRAY') {
150	175					212	$prob = $self->split_prob($prob);
151	175					246	$count->{$cur} = $prob;
152							}
153
154	278					293	my $s = 0;
155	278					208	my $i = 0;
156	278					166	my ($k, $v) = @{$prob};
	278					310
157	278		66			229	do {
158	285					649	$s += $v->[$i];
159							} while ($thresh > $s && ++$i);
160	278					1041	return $k->[$i];
161							}
162
163							sub generate_sample {
164	21			21	1	49278	my ($self) = @_;
165
166	21					63	my $null = $self->null;
167	21					49	my $n = $self->order;
168	21		100			76	my $sep = $self->join_sep // '';
169	21					82	my @nm = ($null,) x $n;
170
171	21					22	do {
172	250					433	push @nm, $self->sample_next_state(@nm[-$n .. -1]);
173							} while ($nm[-1] ne $null);
174
175	21					102	@nm = @nm[$n .. ($#nm-1)];
176
177							return wantarray ?
178	21	100				215	@nm :
		100
179							defined $self->join_sep ?
180							join($sep, @nm) :
181							\@nm;
182
183							}
184
185							__PACKAGE__->meta->make_immutable;
186
187							1;
188
189							__END__