File Coverage

blib/lib/String/Blender.pm
Criterion Covered Total %
statement 16 18 88.8
branch n/a
condition n/a
subroutine 6 6 100.0
pod n/a
total 22 24 91.6


line stmt bran cond sub pod time code
1             #$Id: Blender.pm,v 0.04 2009/07/22 12:42:18 askorikov Exp $
2              
3             package String::Blender;
4              
5 2     2   43450 use 5.008;
  2         7  
  2         72  
6 2     2   10 use warnings;
  2         3  
  2         53  
7 2     2   12 use strict;
  2         8  
  2         54  
8 2     2   1464 use version; our $VERSION = '0.04';
  2         3938  
  2         37  
9              
10 2     2   146 use Carp;
  2         3  
  2         171  
11 2     2   1591 use Moose 0.74;
  0            
  0            
12             use Moose::Util::TypeConstraints;
13              
14             subtype 'VocabStr'
15             => as 'Str'
16             => where { length && $_ !~ /[\n[:cntrl:]]+/msx };
17              
18             subtype 'Natural'
19             => as 'Int'
20             => where { $_ > 0 }
21             => message { "this number ($_) is not positive" };
22              
23             has 'vocabs' => (
24             is => 'rw',
25             isa => 'ArrayRef[ArrayRef[VocabStr]]',
26             predicate => 'has_vocabs',
27             );
28             has 'vocab_files' => (
29             is => 'rw',
30             isa => 'ArrayRef',
31             default => undef,
32             trigger => \&load_vocabs,
33             predicate => 'has_vocab_files',
34             );
35             has 'quantity' => (is => 'rw', isa => 'Natural', default => 10);
36             has 'max_tries_factor' => (is => 'rw', isa => 'Natural', default => 4);
37             has 'min_length' => (is => 'rw', isa => 'Natural', default => 5);
38             has 'max_length' => (is => 'rw', isa => 'Natural', default => 20);
39             has 'min_elements' => (is => 'rw', isa => 'Natural', default => 2);
40             has 'max_elements' => (is => 'rw', isa => 'Natural', default => 5);
41             has 'strict_order' => (is => 'rw', isa => 'Bool', default => 0);
42             has 'delimiter' => (is => 'rw', isa => 'Str', default => q{});
43             has 'prefix' => (is => 'rw', isa => 'Str', default => q{});
44             has 'postfix' => (is => 'rw', isa => 'Str', default => q{});
45              
46             sub BUILD
47             {
48             my $self = shift;
49             $self->has_vocabs || $self->_load_vocabs;
50             return 1;
51             }
52              
53             sub _read_lists
54             {
55             my @filenames = @_;
56             my @list = ();
57             my $line;
58             for my $file_name (@filenames) {
59             open my $fh_lst, '<', $file_name
60             or confess qq(Could not open file "$file_name");
61             while ($line = <$fh_lst>) {
62             $line =~ s/\n+$//msx;
63             push @list, $line
64             }
65             close $fh_lst or confess qq(Could not close file "$file_name");
66             }
67             return \@list;
68             }
69              
70             sub load_vocabs
71             {
72             my $self = shift;
73             my @vocabs = ();
74              
75             ( $self->has_vocab_files && @{ $self->vocab_files } )
76             or confess 'There are no vocabulary files specified';
77              
78             for my $elem ( @{ $self->vocab_files } ) {
79             my $list = ('ARRAY' eq ref $elem) ?
80             _read_lists(@{ $elem }) : _read_lists($elem);
81             push @vocabs, $list;
82             }
83              
84             $self->vocabs(\@vocabs);
85             return scalar @vocabs;
86             }
87              
88             sub blend
89             {
90             my ($self, $quantity) = @_;
91             $quantity ||= $self->quantity;
92             my @result = ();
93             my $vocabs_top = $#{ $self->vocabs };
94             my $numelems_range = $self->max_elements - $self->min_elements;
95             my $permalen = length $self->prefix; $permalen += length $self->postfix;
96             my $delimiterlen = length $self->delimiter;
97             my $max_tries = $quantity * $self->max_tries_factor;
98             my $tries = 0;
99              
100             MULTIPLE:
101             for (1..$quantity) {
102             $tries++;
103             if ($max_tries < $tries) {
104             carp "Maximum tries limit exceeded ($max_tries)";
105             last MULTIPLE;
106             }
107              
108             my @match = ();
109             my $match_top = $self->min_elements - 1 + int rand $numelems_range;
110             my $length = $permalen + $delimiterlen * $match_top;
111              
112             MATCH:
113             for my $i (0..$match_top) {
114             srand;
115              
116             my $vocab = $self->vocabs->[
117             ($i <= $vocabs_top) ? $i : int rand $vocabs_top
118             ];
119             my $element = @{ $vocab }[ int rand $#{ $vocab } ];
120              
121             my $new_length = $length + length $element;
122             redo MULTIPLE if $new_length > $self->max_length;
123             $length = $new_length;
124              
125             int $self->strict_order || int rand() ?
126             push @match, $element : unshift @match, $element;
127             }
128              
129             redo MULTIPLE if ($length < $self->min_length);
130             my $complete_string = join $self->delimiter, @match;
131             $complete_string = $self->prefix . $complete_string . $self->postfix;
132              
133             redo MULTIPLE if scalar grep {$_ eq $complete_string} @result;
134             push @result, $complete_string;
135             }
136              
137             return @result;
138             }
139              
140             no Moose;
141             __PACKAGE__->meta->make_immutable;
142              
143             1;
144             __END__
145              
146             =head1 NAME
147              
148             String::Blender - flexible vocabulary-based generator of compound words (e.g. domain names).
149              
150             =head1 VERSION
151              
152             This document describes String::Blender version 0.04
153              
154             =head1 SYNOPSIS
155              
156             use String::Blender;
157            
158             my $blender = String::Blender->new(
159             vocab_files => [
160             './vocab/hacker-jargon.txt', # load into vocab #0
161             [
162             './vocab/places.txt', # load both files
163             './vocab/boosters.txt', # into vocab #1
164             ]
165             ],
166             quantity => 10,
167             max_length => 20,
168             max_elements => 3,
169             postfix => '.com',
170             );
171            
172             my @result = $blender->blend;
173            
174             # The @result will look like this:
175             # (
176             # 'tastybitshandler.com',
177             # 'bubblesortcore.com',
178             # 'regexpkingdom.com',
179             # 'bigslashbase.com',
180             # 'powerslurp.com',
181             # 'pipestacklabel.com',
182             # 'metaspoofzone.com',
183             # 'randomsubshell.com',
184             # 'forehandleroot.com',
185             # 'pragmaware.com'
186             # );
187            
188             # Vocabularies can be also specified directly, e.g.:
189             my $blender = String::Blender->new(
190             vocabs => [
191             [qw/web net host site list archive core base switch/],
192             [qw/candy honey muffin sugar sweet yammy/],
193             [qw/area city club dominion empire field land valley world/],
194             ],
195             strict_order => 1,
196             min_elements => 3,
197             max_elements => 3,
198             max_length => 20,
199             delimiter => "-",
200             );
201            
202             my @result = $blender->blend(5);
203            
204             # Then the @result will look like this:
205             # (
206             # 'base-honey-field',
207             # 'list-candy-dominion',
208             # 'web-sugar-land',
209             # 'archive-muffin-field',
210             # 'web-yammy-area'
211             # );
212              
213              
214             =head1 DESCRIPTION
215              
216             C<String::Blender> is an OO implementation of random generator of compound
217             words based on one or more priority driven word vocabularies. Originally the
218             module was created for the purpose of constructing new attractive thematic domain
219             names. Later it was used to improve dictionary attack tool.
220              
221             Each vocabulary itself represents an array of single words not necessarily sorted.
222             All vocabularies are stored in an array within predefined order. C<String::Blender>
223             provides ability to load vocabularies from plain text files or set them manually.
224              
225             Resulting compound words are represented as an array of uniq strings which consist
226             of one or more vocabulary words placed in serial or random order; probably
227             prefixed, followed and/or separated by defined strings.
228              
229             Construction of one compound word can be briefly described like this:
230              
231             =over
232              
233             =item * Define random number of elements within a given set of constraints.
234              
235             =item * Address each vocabulary list in a row up to the defined number of
236             elements and take one random word per vocabulary. Once the number of future
237             component words exceeds the number of vocabularies, then take each next word
238             from random vocabulary.
239              
240             =item * Concatenate selected words and/or join them with delimiter, add
241             prefix and postfix if defined.
242              
243             =item * Check the length of the resulting word. Retry attempt if it's too long
244             or too short.
245              
246             =back
247              
248              
249             =head1 SUBROUTINES/METHODS
250              
251             =head2 Class methods
252              
253             =over 4
254              
255             =item * B<new (%config)>
256              
257             The C<new> constructor method instantiates a new C<String::Blender> object.
258             A hash array of configuration attributes may be passed as a parameter.
259             See the </ATTRIBUTES> section.
260              
261             =back
262              
263              
264             =head2 Object methods
265              
266             =over 4
267              
268             =item * B<blend ($quantity)>
269              
270             Generates and returns list of C<$quantity> or less compound words in the manner
271             explained in L</DESCRIPTION> accordingly to constraints and options being set as
272             the object attributes described below. If C<$quantity> is omitted, then value of
273             the object attribute with the same name will be used.
274              
275             =item * B<load_vocabs>
276              
277             Loads vocabulary lists from plain text files collecting one element per line and
278             stores the L</vocabs> attribute. Takes lists of files from the L</vocab_files>
279             attribute. Returns number of vocabularies loaded. Note that this method invokes
280             automatically after object creation if L</vocabs> is empty and after each setting
281             of the L</vocab_files> attribute, so you will not have to call it manually.
282              
283             =item * B<BUILD>
284              
285             Normally, you will not have to invoke this method directly, but you might want
286             to override it. The C<BUILD> method is called after the object is constructed and
287             in the C<String::Blender> object it attempts to load vocabularies from files
288             specified in the L</vocab_files> attribute when no vocabularies provided directly
289             through the L</vocabs> attribute.
290              
291             =back
292              
293              
294             =head1 CONFIGURATION AND ENVIRONMENT
295              
296             The following list gives a short summary of each C<String::Blender> object
297             attribute. All of them can be defined on object creation (see L</new>)
298             or set separately like follows.
299              
300             $blender->max_elements(30);
301             $blender->vocabs(\@my_vocabs);
302              
303              
304             =head2 Vocabularies
305              
306             =over 4
307              
308             =item * B<vocabs>
309              
310             Contains reference to an array of vocabularies. Each vocabulary is represented
311             by a reference to an array of strings, one per element. Any of those strings
312             should not be empty and should not contain newlines and control characters.
313             Being left undefined on object creation, this attribute will be set by the
314             L</load_vocabs> method automatically. In this case you are supposed to have the
315             L</vocab_files> attribute set properly.
316              
317             =item * B<vocab_files>
318              
319             Defines filenames and lists of filenames to read vocabularies from. Contains
320             reference to an array of filenames and/or references to arrays of filenames.
321             The L</load_vocabs> method will merge vocabularies loaded from united filenames
322             into a single vocabulary. After object creation this method will be invoked every
323             time the L</vocab_files> attribute is set. Each vocabulary file should consist
324             of word per line in plain text format.
325              
326             =back
327              
328              
329             =head2 Constraints
330              
331             =over 4
332              
333             =item * B<min_length, max_length>
334              
335             Define the minimum and the maximum length in characters of the resulting string.
336             Positive integers, dafault: 5 and 20 respectively.
337              
338             =item * B<min_elements, max_elements>
339              
340             Define the minimum and the maximum number of elements the resulting string should
341             consist of. Positive integers, dafault: 2 and 5 respectively.
342              
343             =item * B<max_tries_factor>
344              
345             Defines the maximum number of generation loops per </blend> as the product of
346             </quantity> and C<max_tries_factor> values. Positive integer, dafault: 4. For
347             example, if the </quantity> equals to 10, the number of generation loops will be
348             limited to 40.
349              
350             =back
351              
352              
353             =head2 Options
354              
355             =over 4
356              
357             =item * B<quantity>
358              
359             Defines the quantity of strings to be generated per one invocation of the L</blend>
360             method. Positive integer, default: 10.
361              
362             =item * B<strict_order>
363              
364             Concatenate string elements according to the strict order of vocabularies they
365             were taken from. Boolean, default: false.
366              
367             =item * B<delimiter>
368              
369             String to separate string elements with in each resulting string. Empty by default.
370              
371             =item * B<prefix>
372              
373             String to prefix each resulting string with. Empty by default.
374              
375             =item * B<postfix>
376              
377             String to follow each resulting string by. Empty by default.
378              
379             =back
380              
381              
382             =head1 DIAGNOSTICS
383              
384             There are some exceptional situations worth consideration.
385              
386             =over
387              
388             =item C<< Maximum tries limit exceeded (%s) >>
389              
390             Normally the size of resulting list returned by the L</blend> method should be
391             equal to C<$quantity>. But having in mind that the method is intended to provide
392             a list of unique strings within certain restrictions, it becomes clear that in
393             some conditions there is a chance to fall into infinite loop. That's what the
394             L</max_tries_factor> limitation attribute stands for. When the generator runs
395             into narrow constraints and/or poor vocabularies, the resulting list may turn out
396             to be shoter then expected or even empty. In this case relevant warning will follow.
397             In order to avoid this you might want to increase value of the L</max_tries_factor>
398             attribute or weaken generation constraints such as L</min_elements>,
399             L</max_elements>, L</min_length>, L</max_length>.
400              
401             =item C<< There are no vocabulary files specified >>
402              
403             The C<load_vocabs> method will die once the L</vocab_files> attribute is not
404             defined or refers to an empty list.
405              
406             =item C<< Could not open (close) file %s >>
407              
408             L</load_vocabs> will also die being unable to open any file specified in the
409             L</vocab_files> attribute.
410              
411             =item C<< Attribute (%s) does not pass the type constraint because: %s >>
412              
413             Assigning any object attribute to a value which does not match the attribute's
414             type constraints will cause relevant fatal error.
415              
416             =back
417              
418              
419             =head1 DEPENDENCIES
420              
421             C<String::Blender> depends on the L<Moose> object system (version 0.74 or newer)
422             which must be installed separately.
423              
424             =head1 INCOMPATIBILITIES
425              
426             None reported.
427              
428              
429             =head1 BUGS AND LIMITATIONS
430              
431             No bugs have been reported.
432             The API is not stable yet and can be changed in future.
433              
434             Please report any bugs or feature requests to
435             C<bug-string-blender@rt.cpan.org>, or through the web interface at
436             L<http://rt.cpan.org>.
437              
438              
439             =head1 AUTHOR
440              
441             Alexey Skorikov C<< <alexey@skorikov.name> >>
442              
443              
444             =head1 LICENSE AND COPYRIGHT
445              
446             Copyright (c) 2009, Alexey Skorikov C<< <alexey@skorikov.name> >>. All rights reserved.
447              
448             This module is free software; you can redistribute it and/or
449             modify it under the same terms as Perl itself. See L<perlartistic>.
450              
451              
452             =head1 DISCLAIMER OF WARRANTY
453              
454             BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
455             FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
456             OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
457             PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
458             EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
459             WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
460             ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
461             YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
462             NECESSARY SERVICING, REPAIR, OR CORRECTION.
463              
464             IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
465             WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
466             REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENCE, BE
467             LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
468             OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
469             THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
470             RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
471             FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
472             SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
473             SUCH DAMAGES.