File Coverage

blib/lib/Treex/Core/Scenario.pm
Criterion Covered Total %
statement 132 333 39.6
branch 23 80 28.7
condition 2 26 7.6
subroutine 32 45 71.1
pod 9 10 90.0
total 198 494 40.0


line stmt bran cond sub pod time code
1             package Treex::Core::Scenario;
2             $Treex::Core::Scenario::VERSION = '2.20160630';
3 3     3   103649 use Moose;
  3         1223889  
  3         22  
4 3     3   23121 use Treex::Core::Common;
  3         10  
  3         14  
5 3     3   17594 use File::Basename;
  3         8  
  3         179  
6 3     3   17 use File::Slurp;
  3         8  
  3         138  
7 3     3   1562 use File::chdir;
  3         4215  
  3         288  
8 3     3   22 use Digest::MD5 qw(md5_hex);
  3         7  
  3         8672  
9              
10             #use Parse::RecDescent 1.967003; now using standalone version
11              
12             has from_file => (
13             is => 'ro',
14             isa => 'Str',
15             predicate => '_has_from_file',
16             documentation => q(Path to file with scenario),
17             );
18              
19             has from_string => (
20             is => 'ro',
21             isa => 'Str',
22             predicate => '_has_from_string',
23             documentation => q(String with scenario),
24             );
25              
26             has scenario_string => (
27             is => 'ro',
28             isa => 'Str',
29             builder => '_build_scenario_string',
30             lazy => 1,
31             );
32              
33             has block_items => (
34             is => 'ro',
35             isa => 'ArrayRef[HashRef]',
36             builder => 'parse_scenario_string',
37             init_arg => undef,
38             lazy => 1,
39             );
40              
41             has loaded_blocks => (
42             is => 'ro',
43             isa => 'ArrayRef[Treex::Core::Block]',
44             builder => '_build_loaded_blocks',
45             predicate => 'is_initialized',
46             lazy => 1,
47             init_arg => undef,
48             );
49              
50             has document_reader => (
51             is => 'rw',
52             does => 'Treex::Core::DocumentReader',
53             predicate => '_has_document_reader',
54             writer => '_set_document_reader',
55             init_arg => undef,
56             documentation => 'DocumentReader starts every scenario and reads a stream of documents.'
57             );
58              
59             has writers => (
60             is => 'rw',
61             does => 'ArrayRef[Treex::Block::Write::BaseWriter]',
62             default => sub { [] }
63             );
64              
65             has _global_params => (
66             is => 'ro',
67             isa => 'HashRef[Str]',
68             traits => ['Hash'],
69             default => sub { {} },
70             handles => {
71             get_global_param => 'get',
72             set_global_param => 'set',
73              
74             #get_global_param_names => 'keys',
75             #set_verbose => [ set => 'verbose' ],
76             #get_verbose => [ get => 'verbose' ],
77             #set_language => [ set => 'language' ],
78             #get_language => [ get => 'language' ],
79             #... ?
80             },
81             );
82              
83             has parser => (
84             is => 'ro',
85             isa => 'Parse::RecDescent::_Runtime',
86             init_arg => undef,
87             builder => '_build_parser',
88             documentation => q{Parses treex scenarios}
89             );
90              
91             has runner => (
92             is => 'ro',
93             isa => 'Treex::Core::Run',
94             writer => '_set_runner',
95             weak_ref => 1,
96             documentation => 'Treex::Core::Run instance in which the scenario is running',
97             );
98              
99             has cache => (
100             is => 'rw',
101             isa => 'Maybe[Cache::Memcached]',
102             builder => '_build_cache',
103             );
104              
105             sub _build_scenario_string {
106 13     13   39 my $self = shift;
107 13 100       372 if ( $self->_has_from_file ) {
    50          
108 10         257 return $self->_load_scenario_file( $self->from_file );
109             }
110             elsif ( $self->_has_from_string ) {
111 3         155 return $self->from_string;
112             }
113 0         0 log_fatal("You have to provide from_file or from_string attribute");
114             }
115              
116             my %sequence = ();
117              
118             sub _build_loaded_blocks {
119 10     10   24 my $self = shift;
120 10         23 my @block_items = @{ $self->block_items };
  10         254  
121 8         23 my $block_count = scalar @block_items;
122 8         19 my $i = 0;
123 8         19 my @loaded_blocks;
124              
125 8         17 my $sequence_from = 0;
126 8         17 my $sequence_hash = "";
127 8         21 foreach my $block_item (@block_items) {
128 15         35 $i++;
129 15         35 my $params = '';
130 15 50       53 if ( $block_item->{block_parameters} ) {
131 15         32 $params = join ' ', @{ $block_item->{block_parameters} };
  15         59  
132             }
133 15         129 log_info("Loading block $block_item->{block_name} $params ($i/$block_count)");
134 15         80 my $new_block = $self->_load_block($block_item);
135              
136 7 50       36 if ( $new_block->does('Treex::Core::DocumentReader') ) {
    50          
137 0 0       0 log_fatal("Only one DocumentReader per scenario is permitted ($block_item->{block_name})")
138             if $self->_has_document_reader;
139 0         0 $self->_set_document_reader($new_block);
140             }
141             elsif ( $new_block->isa('Treex::Block::Write::BaseWriter') ) {
142 0         0 push( @{ $self->writers }, $new_block );
  0         0  
143 0         0 push @loaded_blocks, $new_block; # duplicity
144             }
145             else {
146 7 50       2643 if ( ref($new_block) eq "Treex::Core::CacheBlock" ) {
    50          
147 0         0 $sequence{$sequence_from}{from} = $sequence_from;
148 0         0 $sequence{$sequence_from}{to} = $i;
149 0         0 $sequence{$sequence_from}{hash} = $sequence_hash;
150              
151 0         0 $sequence{$i}{_from} = $sequence_from;
152 0         0 $sequence_from = $i;
153 0         0 push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() );
  0         0  
154 0         0 $sequence_hash = $new_block->get_hash();
155             }
156             elsif ($self->cache) {
157 0         0 $sequence_hash = md5_hex( $sequence_hash . $new_block->get_hash() );
158 0 0       0 if ( defined( $sequence{$sequence_from} ) ) {
159 0         0 push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() );
  0         0  
160             }
161             }
162              
163 7         28 push @loaded_blocks, $new_block;
164             }
165             }
166              
167 0         0 log_info('ALL BLOCKS SUCCESSFULLY LOADED.');
168 0         0 return \@loaded_blocks;
169             }
170              
171             sub _load_parser {
172 13     13   31 my $self = shift;
173 13         3548 require Treex::Core::ScenarioParser;
174 13         99 return Treex::Core::ScenarioParser->new();
175             }
176              
177             sub _my_dir {
178 0     0   0 return dirname( (caller)[1] );
179             }
180              
181             sub _build_parser {
182 13     13   8857 my $self = shift;
183 13         35 my $parser;
184 13 50       31 eval {
185 13         59 $parser = $self->_load_parser();
186 13         117 1;
187             } and return $parser;
188 0         0 log_info("Cannot find precompiled scenario parser, trying to build it from grammar");
189 0         0 my $dir = $self->_my_dir(); #get module's directory
190 0         0 my $file = "$dir/ScenarioParser.rdg"; #find grammar file
191 0 0       0 log_fatal("Cannot find grammar file") if !-e $file;
192              
193             #in fact we should never reach this
194 0         0 log_warn('We should NOT reach this place. Treex distribution may be corrupted.');
195              
196 0         0 my $grammar = read_file($file); #load it
197             eval {
198 0         0 log_info("Trying to precompile it for you");
199 0         0 require Parse::RecDescent;
200 0         0 local $CWD = $dir;
201 0         0 Parse::RecDescent->Precompile( { -standalone => 1 }, $grammar, 'Treex::Core::ScenarioParser' );
202 0         0 $parser = $self->_load_parser();
203 0         0 1;
204 0 0 0     0 } or eval {
205 0         0 log_info("Cannot precompile, loading directly from grammar. Consider precompiling it manually");
206 0         0 require Parse::RecDescent;
207 0         0 $parser = Parse::RecDescent->new($grammar); #create parser
208 0         0 1;
209             } or log_fatal("Cannot create Scenario parser");
210 0         0 return $parser;
211             }
212              
213             sub _build_cache {
214 13     13   17563 my $self = shift;
215            
216 13 50 33     351 if ( $self->runner && $self->runner->cache ) {
217            
218 0         0 require Treex::Core::CacheBlock;
219 0         0 require Treex::Tool::Memcached::Memcached;
220            
221 0         0 return Treex::Tool::Memcached::Memcached::get_connection(
222             "documents-cache"
223             );
224             }
225              
226 13         44 return;
227             }
228              
229             sub _load_scenario_file {
230 10     10   29 my ( $self, $scenario_filename ) = @_;
231 10         69 log_info "Loading scenario description $scenario_filename";
232 10 50       57 my $scenario_string = read_file( $scenario_filename, binmode => ':utf8', err_mode => 'quiet' )
233             or log_fatal "Can't open scenario file $scenario_filename";
234 10         2125 return $scenario_string;
235             }
236              
237             sub parse_scenario_string {
238 13     13 1 35 my $self = shift;
239 13         396 my $scenario_string = $self->scenario_string;
240 13         328 my $from_file = $self->from_file;
241              
242 13         357 my $parsed = $self->parser->startrule( $scenario_string, 1, $from_file );
243 13 100       66 log_fatal("Cannot parse the scenario: $scenario_string") if !defined $parsed;
244 11         553 return $parsed;
245             }
246              
247             # reverse of parse_scenario_string, used in Treex::Core::Run for treex --dump
248             sub construct_scenario_string {
249 3     3 1 3269 my $self = shift;
250 3         12 my %args = @_;
251 3         8 my $multiline = $args{multiline};
252 3         7 my @block_items = @{ $self->block_items };
  3         94  
253 3 100       14 my $delim = $multiline ? qq{\n} : q{ };
254 3         6 my @block_strings;
255 3         10 foreach my $block_item (@block_items) {
256 7         15 my $name = $block_item->{block_name};
257 7         10 my @parameters = map { _add_quotes($_) } @{ $block_item->{block_parameters} };
  2         7  
  7         18  
258 7 100       36 $name =~ s{^Treex::Block::}{} or $name = "::$name"; #strip leading Treex::Block:: or add leading ::
259 7         15 my $params;
260 7 100       17 if ( scalar @parameters ) {
261 2         8 $params = q{ } . join q{ }, @parameters;
262             }
263             else {
264 5         10 $params = q{};
265             }
266 7         21 push @block_strings, $name . $params;
267             }
268 3         40 return join $delim, @block_strings;
269             }
270              
271             sub get_required_files {
272 0     0 0 0 my $self = shift;
273 0         0 my @block_items = @{ $self->block_items };
  0         0  
274 0         0 my @required_files;
275 0         0 foreach my $block_item (@block_items) {
276 0         0 my $block = $self->_load_block($block_item);
277             push @required_files,
278             map {
279 0         0 $block_item->{block_name} . "\t" . $_;
  0         0  
280             } $block->get_required_share_files();
281             }
282 0         0 return @required_files;
283             }
284              
285             sub _add_quotes { # adding quotes only if param. value contains a space
286 2     2   6 my ($block_parameter) = @_;
287 2         13 my ( $name, $value ) = split /=/, $block_parameter, 2;
288 2 50       13 if ( $value =~ /\s/ ) {
289 0         0 my $res_string = "$name=";
290              
291 0 0 0     0 if ( $value =~ /'/ && $value !~ /"/ ) {
292 0         0 $res_string .= '"' . $value . '"';
293             } else {
294 0         0 $value =~ s/'/\\'/g;
295 0         0 $res_string .= "'" . $value . "'";
296             }
297 0         0 return $res_string;
298             }
299 2         11 return $block_parameter;
300             }
301              
302             sub load_blocks {
303 0     0 1 0 my $self = shift;
304 0         0 $self->loaded_blocks; #just access lazy attribute
305 0         0 return;
306             }
307              
308             sub init {
309 0     0 1 0 my $self = shift;
310 0         0 $self->load_blocks();
311 0         0 return;
312             }
313              
314             sub _load_block {
315 15     15   41 my ( $self, $block_item ) = @_;
316 15         41 my $block_name = $block_item->{block_name};
317 15         36 my $new_block;
318              
319             # Initialize with global (scenario) parameters
320 15         27 my %params = ( %{ $self->_global_params }, scenario => $self );
  15         507  
321              
322             # which can be overriden by (local) block parameters.
323 15         31 foreach my $param ( @{ $block_item->{block_parameters} } ) {
  15         46  
324 8         47 my ( $name, $value ) = split /=/, $param, 2;
325 8         30 $params{$name} = $value;
326             }
327              
328 15 100   1   1497 eval "use $block_name; 1;" or log_fatal "Can't use block $block_name !\n$@\n";
  1     1   604  
  1     1   4  
  1     1   22  
  1     1   594  
  0     1   0  
  0     1   0  
  1     1   10  
  1     1   2  
  1     1   23  
  1     1   47  
  0     1   0  
  0     1   0  
  1     1   10  
  1     1   3  
  1         22  
  1         49  
  0         0  
  0         0  
  1         249  
  0         0  
  0         0  
  1         9  
  1         3  
  1         22  
  1         33  
  0         0  
  0         0  
  1         10  
  1         2  
  1         22  
  1         32  
  0         0  
  0         0  
  1         11  
  1         2  
  1         22  
  1         36  
  0         0  
  0         0  
  1         9  
  1         2  
  1         24  
  1         33  
  0            
  0            
329 7 50       34 eval {
330 7         54 $new_block = $block_name->new( \%params );
331 7         87 1;
332             } or log_fatal "Treex::Core::Scenario->new: error when initializing block $block_name\n\nEVAL ERROR:\t$@";
333              
334 7 0 33     196 if ( $self->cache && $params{'use_cache'} ) {
335 0         0 $new_block = Treex::Core::CacheBlock->new( { block => $new_block, cache => $self->cache } );
336             }
337              
338 7         34 return $new_block;
339             }
340              
341             sub run {
342 0     0 1 0 my ($self) = @_;
343 0         0 my $number_of_blocks = @{ $self->loaded_blocks };
  0         0  
344 0 0       0 log_fatal('No DocumentReader supplied') if !$self->_has_document_reader;
345 0         0 my $reader = $self->document_reader;
346 0   0     0 my $number_of_documents = $reader->number_of_documents_per_this_job() || '?';
347 0         0 my $document_number = 0;
348              
349             #if ( $self->cache ) {
350             # $document_number = $self->_run_with_cache( $reader, $number_of_blocks, $number_of_documents );
351             #}
352             #else {
353 0         0 $document_number = $self->_run_without_cache( $reader, $number_of_blocks, $number_of_documents );
354             #}
355              
356 0 0       0 log_info "Processed $document_number document"
357             . ( $document_number == 1 ? '' : 's' );
358 0         0 return 1;
359             }
360              
361             sub _run_with_cache {
362              
363 0     0   0 my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_;
364 0         0 my $document_number = 0;
365              
366 0         0 while ( my $document = $reader->next_document_for_this_job() ) {
367 0         0 $document_number++;
368 0         0 my $doc_name = $document->full_filename;
369 0         0 my $doc_from = $document->loaded_from;
370 0         0 log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from";
371 0         0 my $block_number = 0;
372 0         0 my $skip_to = 0;
373 0         0 my $process = 0;
374 0         0 my $skip_from = 0;
375 0         0 my $from_hash = "";
376 0         0 my $document_last_hash = "";
377 0         0 foreach my $block ( @{ $self->loaded_blocks } ) {
  0         0  
378 0         0 $block_number++;
379 0         0 $process = 1;
380 0 0       0 if ( $block_number < $skip_to ) {
    0          
381              
382             # we know that there are identical, so we can skip them
383 0         0 log_info "Skipping block $block_number/$number_of_blocks " . ref($block);
384 0         0 $process = 0;
385             }
386             elsif ( $block_number == $skip_to ) {
387              
388             # this is border Cache block -> we have to check whether next sequence is also same
389 0         0 $skip_from = $block_number + 1;
390              
391             # following sequence is same => we can continue with skipping
392 0 0 0     0 if ($sequence{$skip_from}{'to'}
393             &&
394             $self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() )
395             )
396             {
397              
398             #log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to});
399 0         0 $skip_to = $sequence{$skip_from}{to} - 1;
400 0         0 $from_hash = $document->get_hash();
401 0         0 $process = 0;
402             }
403             else {
404 0         0 $document_last_hash = $document->get_hash();
405              
406             #$document->set_hash(md5_hex($document->get_hash() . $block->get_hash()));
407 0         0 my $full_hash = $document->get_hash();
408 0         0 $document = $self->cache->get($full_hash);
409              
410 0 0       0 if ( !$document ) {
411 0         0 log_fatal("Document - $full_hash is missing!!!");
412             }
413 0         0 $process = 2;
414             }
415             }
416              
417 0 0       0 if ( $process == 1 ) {
418 0         0 log_info "Applying block $block_number/$number_of_blocks " . ref($block);
419              
420 0 0       0 $block->process_start if ( !$block->is_started );
421              
422             #log_info("Document-hash: " . $document->get_hash());
423 0         0 $skip_from = $block_number + 1;
424 0         0 my $status = $block->process_document($document);
425 0 0 0     0 if (defined($status)
      0        
      0        
426             &&
427             $status == $Treex::Core::Block::DOCUMENT_FROM_CACHE &&
428             $sequence{$skip_from}{'to'} &&
429             $self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() )
430             )
431             {
432              
433             #log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to});
434 0         0 $skip_to = $sequence{$skip_from}{to} - 1;
435 0         0 $skip_from = $block_number + 1;
436 0         0 $from_hash = $document->get_hash();
437             }
438             }
439              
440 0         0 $document_last_hash = $document->get_hash();
441 0         0 $document->set_hash( md5_hex( $document->get_hash() . $block->get_hash() ) );
442              
443 0 0       0 if ( ref($block) eq "Treex::Core::CacheBlock" ) {
444              
445             # cache block => mark this path as known
446 0         0 my $id = $block_number + 1;
447 0         0 my $from = $sequence{$id}{'_from'};
448              
449             # the first sequence has no document
450 0 0       0 if ( defined( $sequence{$from}{'document'} ) ) {
451 0         0 $self->_set_known_sequence( $sequence{$from}{'hash'}, $sequence{$from}{'document'} );
452             }
453              
454 0         0 $sequence{$id}{'document'} = $document_last_hash;
455             }
456             }
457              
458             # this actually marks the document as successfully done in parallel processing (if this line
459             # does not appear in the output, the parallel process will fail -- it must appear at any errorlevel,
460             # therefore not using log_info or similiar)
461 0 0       0 if ( $self->document_reader->jobindex ) {
462 0         0 print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n";
463             }
464             }
465              
466 0         0 log_info "Applying process_end";
467              
468 0         0 foreach my $block ( @{ $self->loaded_blocks } ) {
  0         0  
469 0 0       0 $block->process_end() if ( $block->is_started );
470             }
471              
472 0         0 return $document_number;
473             }
474              
475             sub _is_known_sequence {
476 0     0   0 my ( $self, $sequence_hash, $document_hash ) = @_;
477 0         0 my $hash = md5_hex( $sequence_hash, $document_hash );
478 0         0 return $self->cache->get($hash);
479             }
480              
481             sub _set_known_sequence {
482 0     0   0 my ( $self, $sequence_hash, $document_hash ) = @_;
483 0         0 my $hash = md5_hex( $sequence_hash, $document_hash );
484 0         0 $self->cache->set( $hash, 1 );
485              
486 0         0 return;
487             }
488              
489             sub _run_without_cache {
490              
491 0     0   0 my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_;
492 0         0 my $document_number = 0;
493              
494 0         0 $self->start();
495              
496 0         0 while ( my $document = $reader->next_document_for_this_job() ) {
497 0         0 $document_number++;
498 0         0 my $doc_name = $document->full_filename;
499 0         0 my $doc_from = $document->loaded_from;
500 0         0 log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from";
501 0         0 my $block_number = 0;
502 0         0 foreach my $block ( @{ $self->loaded_blocks } ) {
  0         0  
503 0         0 $block_number++;
504 0         0 log_info "Applying block $block_number/$number_of_blocks " . ref($block);
505 0         0 $block->process_document($document);
506             }
507              
508             # this actually marks the document as successfully done in parallel processing (if this line
509             # does not appear in the output, the parallel process will fail -- it must appear at any errorlevel,
510             # therefore not using log_info or similiar)
511 0 0       0 if ( $self->document_reader->jobindex ) {
512 0         0 print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n";
513             }
514             }
515              
516 0         0 $self->end();
517              
518 0 0       0 log_info "Processed $document_number document"
519             . ( $document_number == 1 ? '' : 's' );
520              
521 0         0 return $document_number;
522             }
523              
524             # Apply process_start to all blocks for which this has not yet been applied
525             sub start {
526 0     0 1 0 my ($self) = @_;
527              
528 0         0 log_info "Applying process_start";
529 0         0 foreach my $block ( @{ $self->loaded_blocks } ) {
  0         0  
530 0 0       0 $block->process_start() if ( !$block->is_started );
531             }
532              
533 0         0 return;
534             }
535              
536             # Apply the scenario to documents given in parameter
537             sub apply_to_documents {
538              
539 0     0 1 0 my ( $self, @documents ) = @_;
540              
541 0         0 my $number_of_blocks = @{ $self->loaded_blocks };
  0         0  
542 0         0 my $block_number = 0;
543              
544 0         0 foreach my $document (@documents){
545 0         0 log_info "Processing document" . $document->full_filename;
546              
547 0         0 foreach my $block ( @{ $self->loaded_blocks } ) {
  0         0  
548 0         0 $block_number++;
549 0         0 log_info "Applying block $block_number/$number_of_blocks " . ref($block);
550 0         0 $block->process_document($document);
551             }
552             }
553              
554 0         0 return;
555             }
556              
557             # Apply process_end to all blocks for which this has not yet been applied
558             sub end {
559 0     0 1 0 my ($self) = @_;
560              
561 0         0 log_info "Applying process_end";
562 0         0 foreach my $block ( @{ $self->loaded_blocks } ) {
  0         0  
563 0 0       0 $block->process_end() if ( $block->is_started );
564             }
565              
566 0         0 return;
567             }
568              
569 3     3   1300 use Module::Reload;
  3         1111  
  3         283  
570              
571             sub restart {
572 0     0 1 0 my ($self) = @_;
573 0         0 my $changed_modules = Module::Reload->check;
574 0         0 log_info "Number of reloaded modules = $changed_modules";
575 0         0 log_info "reseting the document reader\n";
576 0         0 $self->document_reader->restart();
577              
578             # TODO rebuild the reloaded blocks
579 0         0 return;
580             }
581              
582             1;
583              
584             __END__
585              
586             =for Pod::Coverage BUILD
587              
588             =encoding utf-8
589              
590             =head1 NAME
591              
592             Treex::Core::Scenario - a larger Treex processing unit, composed of blocks
593              
594             =head1 VERSION
595              
596             version 2.20160630
597              
598             =head1 SYNOPSIS
599              
600             use Treex::Core;
601              
602             my $doc1, $doc2;
603             my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' );
604             $scenario->run;
605              
606              
607             $scenario = Treex::Core::Scenario->new(from_string => 'W2A::EN::Segment language=en');
608             $scenario->start();
609             $scenario->apply_to_documents($doc1, $doc2);
610             $scenario->end();
611              
612              
613             =head1 DESCRIPTION
614              
615              
616             A Treex scenario consists of a sequence of (possibly parametrized) Treex blocks.
617              
618             Scenarios can be described by a simple textual format, which is either passed
619             directly to the scenario construction, or is contained in a text file whose
620             name is passed.
621              
622             The string description of scenarios looks as follows.
623              
624             1) It contains a list of block names from which their 'C<Treex::Block::>'
625             prefixes were removed.
626              
627             2) The block names are separated by one or more white spaces.
628              
629             3) The block names are listed in the same order in which they should be
630             applied on data.
631              
632             4) For each block, there can be one or more parameters specified, using the
633             C<attribute=value> form.
634              
635             5) Comments start with 'C<#>' and end with the nearest newline character.
636              
637              
638             Scenario example:
639              
640             # morphological analysis of an English text
641             Util::SetGlobal language=en selector=src
642             Read::Text
643             W2A::ResegmentSentences
644             W2A::EN::Tokenize
645             W2A::EN::NormalizeForms
646             W2A::EN::FixTokenization
647             W2A::EN::TagMorce
648              
649              
650             =head1 METHODS
651              
652             =head2 Constructor
653              
654             =over 4
655              
656             =item my $scenario = Treex::Core::Scenario->new(from_string => 'W2A::Tokenize language=en W2A::Lemmatize' );
657              
658             Constructor parameter C<from_string> specifies the names of blocks which are
659             to be executed (in the specified order) when the scenario is applied on a
660             L<Treex::Core::Document> object.
661              
662             =item my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' );
663              
664             The scenario description is loaded from the file.
665              
666             =back
667              
668              
669             =head2 Running the scenario
670              
671             =over 4
672              
673             =item $scenario->run();
674              
675             Run the scenario.
676             One of the blocks (usually the first one) must be the document reader (see
677             L<Treex::Core::DocumentReader>) that produces the
678             documents on which this scenario is applied.
679              
680             =item $scenario->apply_to_documents($treex_doc);
681              
682             Apply this scenario to a L<Treex::Core::Document> instance obtained from elsewhere.
683             Please note that C<start()> must be called before the first call to this method and C<end()>
684             after the last call to this method.
685              
686             The scenario does not need to contain a document reader if documents are given
687             explicitly.
688              
689             =item $scenario->start();
690              
691             Apply C<process_start()> to all blocks in the scenario.
692             This is called automatically by C<run()>, but must be called before C<apply_to_documents()>.
693              
694             =item $scenario->end();
695              
696             Apply C<process_end()> to all blocks in the scenario.
697             This is called automatically by C<run()>, but must be called after calls to C<apply_to_documents()>.
698              
699              
700             =back
701              
702             =head2 Internal methods for loading scenarios
703              
704             =over 4
705              
706             =item _load_scenario_file($filename)
707              
708             loads a scenario description from a file
709              
710             =item parse_scenario_string
711              
712             parses a textual description of a scenario
713              
714             =item construct_scenario_string
715              
716             constructs a scenario textual description from an existing scenario instance
717             accepts named parameter multiline - when set, blocks are separated by newline instead of space
718              
719             =item load_blocks
720              
721             use blocks and call their constructors
722             can be used for preloading blocks for e.g. server applications
723             when running scenario blocks are loaded automatically
724              
725             =item init
726              
727             do all initialization so after this method scenario is ready to run
728             currently just load blocks
729              
730             =item restart
731              
732             resets the document reader, in future it will rebuild reloaded blocks
733              
734             =back
735              
736              
737             =head1 SEE ALSO
738              
739             L<Treex::Core::Block>
740             L<Treex::Core>
741              
742             =head1 AUTHORS
743              
744             ZdenÄ›k Žabokrtský <zabokrtsky@ufal.mff.cuni.cz>
745              
746             Martin Popel <popel@ufal.mff.cuni.cz>
747              
748             David Mareček <marecek@ufal.mff.cuni.cz>
749              
750             Tomáš Kraut <kraut@ufal.mff.cuni.cz>
751              
752             Martin MajliÅ¡ <majlis@ufal.mff.cuni.cz>
753              
754             OndÅ™ej DuÅ¡ek <odusek@ufal.mff.cuni.cz>
755              
756             =head1 COPYRIGHT AND LICENSE
757              
758             Copyright © 2011-2012 by Institute of Formal and Applied Linguistics, Charles University in Prague
759              
760             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.