File Coverage

blib/lib/Text/Scigen/scigen.pm

Criterion	Covered	Total	%
statement	112	162	69.1
branch	40	70	57.1
condition	4	6	66.6
subroutine	12	12	100.0
pod	0	10	0.0
total	168	260	64.6

line	stmt	bran	cond	sub	pod	time	code
1							package Text::Scigen::scigen;
2
3							# This file is part of SCIgen.
4							#
5							# SCIgen is free software; you can redistribute it and/or modify
6							# it under the terms of the GNU General Public License as published by
7							# the Free Software Foundation; either version 2 of the License, or
8							# (at your option) any later version.
9							#
10							# SCIgen is distributed in the hope that it will be useful,
11							# but WITHOUT ANY WARRANTY; without even the implied warranty of
12							# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13							# GNU General Public License for more details.
14							#
15							# You should have received a copy of the GNU General Public License
16							# along with SCIgen; if not, write to the Free Software
17							# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19
20	1			1		10	use strict;
	1					4
	1					80
21	1			1		9	use vars qw($SCIGEND_PORT);
	1					3
	1					3819
22
23							#### daemon settings ####
24							$SCIGEND_PORT = 4724;
25
26							sub dup_name {
27	11			11	0	15	my $name = shift;
28	11					46	return $name . "!!!";
29							}
30
31							sub file_name {
32	2			2	0	2	my $name = shift;
33	2					12	return $name . ".file";
34							}
35
36							sub read_rules {
37	2			2	0	5	my ($fh, $rules, $RE, $debug, $self) = @_;
38	2					3	my $line;
39	2					75	while ($line = <$fh>) {
40	3979	100				7554	next if $line =~ /^#/ ;
41	3850	100				11432	next if $line !~ /\S/ ;
42
43	3176					10595	my @words = split /\s+/, $line;
44	3176					4735	my $name = shift @words;
45	3176					3303	my $rule = "";
46
47							# non-duplicate rule
48	3176	100				6205	if( $name =~ /([^\+]*)\!$/ ) {
49	3					7	$name = $1;
50	3					4	push @{$rules->{dup_name("$name")}}, "";
	3					15
51	3					15	next;
52							}
53
54							# include rule
55	3173	100				5499	if( $name =~ /\.include$/ ) {
56	1					2	my $file = $words[0];
57							# make sure we haven't already included this file
58							# NOTE: this allows the main file to be included at most twice
59	1	50				5	if( defined $rules->{&file_name($file)} ) {
60	0	0				0	if( $debug > 0 ) {
61	0					0	print "Skipping duplicate included file $file\n";
62							}
63	0					0	next;
64							} else {
65	1					3	$rules->{&file_name($file)} = 1;
66							}
67	1	50				5	if( $debug > 0 ) {
68	0					0	print "Opening included file $file\n";
69							}
70	1					7	my $path = $self->_find( $file );
71	1	50				52	open(my $inc_fh, "<:utf8", $path)
72							or die( "Couldn't open included file $path" );
73	1					15	&read_rules( $inc_fh, $rules, undef, $debug );
74	1					56	next; # we don't want to have .include itself be a rule
75							}
76
77	3172	100	100			12965	if ($#words == 0 && $words[0] eq '{') {
78	124					131	my $end = 0;
79	124					425	while ($line = <$fh>) {
80	1147	100				1845	if ($line =~ /^}[\r\n]+$/) {
81	124					140	$end = 1;
82	124					139	last;
83							} else {
84	1023					2294	$rule .= $line;
85							}
86							}
87	124	50				237	if (! $end) {
88	0					0	die "$name: EOF found before close rule\n";
89							}
90							} else {
91	3048					12630	$line =~ s/^\S+\s+//;
92	3048					4298	chomp ($line);
93	3048					3594	$rule = $line;
94							}
95
96							# look for the weight
97	3172					3453	my $weight = 1;
98	3172	100				7394	if( $name =~ /([^\+]*)\+(\d+)$/ ) {
99	196					397	$name = $1;
100	196					278	$weight = $2;
101	196	50				371	if( $debug > 10 ) {
102	0					0	warn "weighting rule by $weight: $name -> $rule\n";
103							}
104							}
105
106	3172					2878	do {
107	4885					4794	push @{$rules->{$name}}, $rule;
	4885					27049
108							} while( --$weight > 0 );
109							}
110
111	2	100				12	if( defined $RE ) {
112	1					8	compute_re( $rules, $RE );
113							}
114
115							}
116
117							sub compute_re {
118
119							# must sort; order matters, and we want to make sure that we get
120							# the longest matches first
121	1			1	0	3	my ($rules, $RE) = @_;
122	1					433	my $in = join "\|", sort { length ($b) <=> length ($a) } keys %$rules;
	5302					5486
123	1					3174	$$RE = qr/^(.*?)(${in})/s ;
124
125							}
126
127							sub generate {
128	1			1	0	4	my ($rules, $start, $RE, $debug, $pretty) = @_;
129
130
131	1					7	my $s = expand ($rules, $start, $RE, $debug);
132	1	50				4	if( $pretty ) {
133	1					4	$s = pretty_print($s);
134							}
135	1					15	return $s;
136							}
137
138							sub pick_rand {
139	8			8	0	10	my ($set) = @_;
140	8					17	my $n = $#$set + 1;
141	8					80	my $v = @$set[int (rand () * $n)];
142	8					17	return $v;
143							}
144
145							sub pop_first_rule {
146	15			15	0	27	my ($rules, $preamble, $input, $rule, $RE) = @_;
147
148	15					17	$$preamble = undef;
149	15					18	$$rule = undef;
150
151	15					16	my $ret = undef;
152
153	15	100				108	if ($$input =~ s/$RE//s ) {
154	7					18	$$preamble = $1;
155	7					13	$$rule = $2;
156	7					24	return 1;
157							}
158
159	8					24	return 0;
160							}
161
162							sub pretty_print {
163
164	1			1	0	2	my ($s) = shift;
165
166	1					2	my $news = "";
167	1					7	my @lines = split( /\n/, $s );
168	1					3	foreach my $line (@lines) {
169
170	1					3	my $newline = "";
171
172	1					9	$line =~ s/(\s+)([\.\,\?\;\:])/$2/g;
173	1					25	$line =~ s/(\b)(a)\s+([aeiou])/$1$2n $3/gi;
174
175	1	50	33			36	if( $line =~ /\\section(\?){(.)}/ ) {
		50
		50
		50
		50
176	0					0	$newline = "\\section${1}{" .
177							Text::Autoformat::autoformat( $2, { case => 'highlight',
178							squeeze => 0 } );
179	0					0	chomp $newline;
180	0					0	chomp $newline;
181	0					0	$newline .= "}";
182							} elsif( $line =~ /(\\subsection){(.*)}/ or
183							$line =~ /(\\slideheading){(.*)}/ ) {
184	0					0	$newline = $1 . "{" .
185							Text::Autoformat::autoformat( $2, { case => 'highlight',
186							squeeze => 0 } );
187	0					0	chomp $newline;
188	0					0	chomp $newline;
189	0					0	$newline .= "}";
190							} elsif( $line =~ /\\title{(.*)}/ ) {
191	0					0	$newline = "\\title{" .
192							Text::Autoformat::autoformat( $1, { case => 'highlight',
193							squeeze => 0 } );
194	0					0	chomp $newline;
195	0					0	chomp $newline;
196	0					0	$newline .= "}";
197							} elsif( $line =~ /(.) = {(.)}\,/ ) {
198	0					0	my $label = $1;
199	0					0	my $curr = $2;
200							# place brackets around any words containing capital letters
201	0					0	$curr =~ s/\b([^\s][A-Z]+[^\s\:])\b/\{$1\}/g;
202	0					0	$newline = "$label = {" .
203							Text::Autoformat::autoformat( $curr, { case => 'highlight',
204							squeeze => 0 } );
205	0					0	chomp $newline;
206	0					0	chomp $newline;
207	0					0	$newline .= "},";
208							} elsif( $line =~ /\S/ ) {
209	1					5	$newline =
210							Text::Autoformat::autoformat( $line, { case => 'sentence',
211							squeeze => 0,
212							break => break_latex(),
213							ignore => qr/^\\/ } );
214							}
215
216	1					5723	$newline =~ s/\\Em/\\em/g;
217
218	1	50				10	if( $newline !~ /\n$/ ) {
219	0					0	$newline .= "\n";
220							}
221	1					5	$news .= $newline;
222
223							}
224
225	1					4	return $news;
226							}
227
228							sub break_latex($$$) {
229	1			1	0	2	my ($text, $reqlen, $fldlen) = @_;
230	1	50				5	if( !defined $text ) {
231	1					2	$text = "";
232							}
233	1					17	return { $text, "" };
234							}
235
236							sub expand {
237	8			8	0	15	my ($rules, $start, $RE, $debug) = @_;
238
239							# check for special rules ending in + and #
240							# Rules ending in + generate a sequential integer
241							# The same rule ending in # chooses a random # from among preiously
242							# generated integers
243	8	50				39	if( $start =~ /(.*)\+$/ ) {
		50
244	0					0	my $rule = $1;
245	0					0	my $i = $rules->{$rule};
246	0	0				0	if( !defined $i ) {
247	0					0	$i = 0;
248	0					0	$rules->{$rule} = 1;
249							} else {
250	0					0	$rules->{$rule} = $i+1;
251							}
252	0					0	return $i;
253							} elsif( $start =~ /(.*)\#$/ ) {
254	0					0	my $rule = $1;
255	0					0	my $i = $rules->{$rule};
256	0	0				0	if( !defined $i ) {
257	0					0	$i = 0;
258							} else {
259	0					0	$i = int rand $i;
260							}
261	0					0	return $i;
262							}
263	8					9	my $full_token;
264
265	8					9	my $repeat = 0;
266	8					46	my $count = 0;
267	8					12	do {
268
269	8					29	my $input = pick_rand ($rules->{$start});
270	8					9	$count++;
271	8	50				22	if ($debug >= 5) {
272	0					0	warn "$start -> $input\n";
273							}
274
275	8					10	my ($pre, $rule);
276	0					0	my @components;
277	8					10	$repeat = 0;
278
279	8					19	while (pop_first_rule ($rules, \$pre, \$input, \$rule, $RE)) {
280	7					34	my $ex = expand ($rules, $rule, $RE, $debug);
281	7	100				29	push @components, $pre if length ($pre);
282	7	50				122	push @components, $ex if length ($ex);
283							}
284	8	100				27	push @components, $input if length ($input);
285	8					18	$full_token = join "", @components ;
286	8					25	my $ref = $rules->{dup_name("$start")};
287	8	50				39	if( defined $ref ) {
288	0					0	my @dups = @{$ref};
	0					0
289							# make sure we haven't generated this exact token yet
290	0					0	foreach my $d (@dups) {
291	0	0				0	if( $d eq $full_token ) {
292	0					0	$repeat = 1;
293							}
294							}
295
296	0	0				0	if( !$repeat ) {
		0
297	0					0	push @{$rules->{dup_name("$start")}}, $full_token;
	0					0
298							} elsif( $count > 50 ) {
299	0					0	$repeat = 0;
300							}
301
302							}
303
304							} while( $repeat );
305
306	8					21	return $full_token;
307
308							}
309
310
311							1;