File Coverage

blib/lib/Treex/Core/Scenario.pm
Criterion Covered Total %
statement 161 299 53.8
branch 32 80 40.0
condition 3 26 11.5
subroutine 25 33 75.7
pod 9 10 90.0
total 230 448 51.3


line stmt bran cond sub pod time code
1             package Treex::Core::Scenario;
2             $Treex::Core::Scenario::VERSION = '2.20210102';
3 12     12   347398 use Moose;
  12         1399128  
  12         117  
4 12     12   84048 use Treex::Core::Common;
  12         48  
  12         135  
5 12     12   67836 use File::Basename;
  12         35  
  12         930  
6 12     12   92 use File::Slurp;
  12         30  
  12         751  
7 12     12   6205 use File::chdir;
  12         19640  
  12         1437  
8 12     12   107 use Digest::MD5 qw(md5_hex);
  12         33  
  12         45185  
9              
10             #use Parse::RecDescent 1.967003; now using standalone version
11              
12             has from_file => (
13             is => 'ro',
14             isa => 'Str',
15             predicate => '_has_from_file',
16             documentation => q(Path to file with scenario),
17             );
18              
19             has from_string => (
20             is => 'ro',
21             isa => 'Str',
22             predicate => '_has_from_string',
23             documentation => q(String with scenario),
24             );
25              
26             has scenario_string => (
27             is => 'ro',
28             isa => 'Str',
29             builder => '_build_scenario_string',
30             lazy => 1,
31             );
32              
33             has block_items => (
34             is => 'ro',
35             isa => 'ArrayRef[HashRef]',
36             builder => 'parse_scenario_string',
37             init_arg => undef,
38             lazy => 1,
39             );
40              
41             has loaded_blocks => (
42             is => 'ro',
43             isa => 'ArrayRef[Treex::Core::Block]',
44             builder => '_build_loaded_blocks',
45             predicate => 'is_initialized',
46             lazy => 1,
47             init_arg => undef,
48             );
49              
50             has document_reader => (
51             is => 'rw',
52             does => 'Treex::Core::DocumentReader',
53             predicate => '_has_document_reader',
54             writer => '_set_document_reader',
55             init_arg => undef,
56             documentation => 'DocumentReader starts every scenario and reads a stream of documents.'
57             );
58              
59             has writers => (
60             is => 'rw',
61             does => 'ArrayRef[Treex::Block::Write::BaseWriter]',
62             default => sub { [] }
63             );
64              
65             has _global_params => (
66             is => 'ro',
67             isa => 'HashRef[Str]',
68             traits => ['Hash'],
69             default => sub { {} },
70             handles => {
71             get_global_param => 'get',
72             set_global_param => 'set',
73              
74             #get_global_param_names => 'keys',
75             #set_verbose => [ set => 'verbose' ],
76             #get_verbose => [ get => 'verbose' ],
77             #set_language => [ set => 'language' ],
78             #get_language => [ get => 'language' ],
79             #... ?
80             },
81             );
82              
83             has parser => (
84             is => 'ro',
85             isa => 'Parse::RecDescent::_Runtime',
86             init_arg => undef,
87             builder => '_build_parser',
88             documentation => q{Parses treex scenarios}
89             );
90              
91             has runner => (
92             is => 'ro',
93             isa => 'Treex::Core::Run',
94             writer => '_set_runner',
95             weak_ref => 1,
96             documentation => 'Treex::Core::Run instance in which the scenario is running',
97             );
98              
99             has cache => (
100             is => 'rw',
101             isa => 'Maybe[Cache::Memcached]',
102             builder => '_build_cache',
103             );
104              
105             sub _build_scenario_string {
106 15     15   319 my $self = shift;
107 14 100       480 if ( $self->_has_from_file ) {
    50          
108 10         289 return $self->_load_scenario_file( $self->from_file );
109             }
110             elsif ( $self->_has_from_string ) {
111 5         135 return $self->from_string;
112             }
113 1         3 log_fatal("You have to provide from_file or from_string attribute");
114             }
115              
116             my %sequence = ();
117              
118             sub _build_loaded_blocks {
119 12     12   58 my $self = shift;
120 12         34 my @block_items = @{ $self->block_items };
  12         365  
121 10         45 my $block_count = scalar @block_items;
122 10         31 my $i = 0;
123 10         25 my @loaded_blocks;
124              
125 10         39 my $sequence_from = 0;
126 10         32 my $sequence_hash = "";
127 10         27 foreach my $block_item (@block_items) {
128 21         63 $i++;
129 21         55 my $params = '';
130 21 50       76 if ( $block_item->{block_parameters} ) {
131 21         73 $params = join ' ', @{ $block_item->{block_parameters} };
  21         83  
132             }
133 21         185 log_info("Loading block $block_item->{block_name} $params ($i/$block_count)");
134 21         129 my $new_block = $self->_load_block($block_item);
135              
136 19 100       123 if ( $new_block->does('Treex::Core::DocumentReader') ) {
    100          
137 8 50       3852 log_fatal("Only one DocumentReader per scenario is permitted ($block_item->{block_name})")
138             if $self->_has_document_reader;
139 8         287 $self->_set_document_reader($new_block);
140             }
141             elsif ( $new_block->isa('Treex::Block::Write::BaseWriter') ) {
142 1         552 push( @{ $self->writers }, $new_block );
  1         31  
143 1         4 push @loaded_blocks, $new_block; # duplicity
144             }
145             else {
146 10 50       4328 if ( ref($new_block) eq "Treex::Core::CacheBlock" ) {
    50          
147 0         0 $sequence{$sequence_from}{from} = $sequence_from;
148 0         0 $sequence{$sequence_from}{to} = $i;
149 0         0 $sequence{$sequence_from}{hash} = $sequence_hash;
150              
151 0         0 $sequence{$i}{_from} = $sequence_from;
152 0         0 $sequence_from = $i;
153 0         0 push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() );
  0         0  
154 0         0 $sequence_hash = $new_block->get_hash();
155             }
156             elsif ($self->cache) {
157 0         0 $sequence_hash = md5_hex( $sequence_hash . $new_block->get_hash() );
158 0 0       0 if ( defined( $sequence{$sequence_from} ) ) {
159 0         0 push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() );
  0         0  
160             }
161             }
162              
163 10         44 push @loaded_blocks, $new_block;
164             }
165             }
166              
167 8         49 log_info('ALL BLOCKS SUCCESSFULLY LOADED.');
168 8         297 return \@loaded_blocks;
169             }
170              
171             sub _load_parser {
172 14     15   35 my $self = shift;
173 14         6500 require Treex::Core::ScenarioParser;
174 14         131 return Treex::Core::ScenarioParser->new();
175             }
176              
177             sub _my_dir {
178 0     1   0 return dirname( (caller)[1] );
179             }
180              
181             sub _build_parser {
182 14     15   14712 my $self = shift;
183 14         37 my $parser;
184 14 50       39 eval {
185 14         65 $parser = $self->_load_parser();
186 14         143 1;
187             } and return $parser;
188 0         0 log_info("Cannot find precompiled scenario parser, trying to build it from grammar");
189 0         0 my $dir = $self->_my_dir(); #get module's directory
190 0         0 my $file = "$dir/ScenarioParser.rdg"; #find grammar file
191 0 0       0 log_fatal("Cannot find grammar file") if !-e $file;
192              
193             #in fact we should never reach this
194 0         0 log_warn('We should NOT reach this place. Treex distribution may be corrupted.');
195              
196 0         0 my $grammar = read_file($file); #load it
197             eval {
198 0         0 log_info("Trying to precompile it for you");
199 0         0 require Parse::RecDescent;
200 0         0 local $CWD = $dir;
201 0         0 Parse::RecDescent->Precompile( { -standalone => 1 }, $grammar, 'Treex::Core::ScenarioParser' );
202 0         0 $parser = $self->_load_parser();
203 0         0 1;
204 0 0 0     0 } or eval {
205 0         0 log_info("Cannot precompile, loading directly from grammar. Consider precompiling it manually");
206 0         0 require Parse::RecDescent;
207 0         0 $parser = Parse::RecDescent->new($grammar); #create parser
208 0         0 1;
209             } or log_fatal("Cannot create Scenario parser");
210 0         0 return $parser;
211             }
212              
213             sub _build_cache {
214 14     15   30785 my $self = shift;
215              
216 14 50 33     433 if ( $self->runner && $self->runner->cache ) {
217              
218 0         0 require Treex::Core::CacheBlock;
219 0         0 require Treex::Tool::Memcached::Memcached;
220              
221 0         0 return Treex::Tool::Memcached::Memcached::get_connection(
222             "documents-cache"
223             );
224             }
225              
226 14         52 return;
227             }
228              
229             sub _load_scenario_file {
230 10     11   31 my ( $self, $scenario_filename ) = @_;
231 10         70 log_info "Loading scenario description $scenario_filename";
232 10 50       60 my $scenario_string = read_file( $scenario_filename, binmode => ':utf8', err_mode => 'quiet' )
233             or log_fatal "Can't open scenario file $scenario_filename";
234 10         1947 return $scenario_string;
235             }
236              
237             sub parse_scenario_string {
238 14     14 1 36 my $self = shift;
239 14         420 my $scenario_string = $self->scenario_string;
240 14         417 my $from_file = $self->from_file;
241              
242 14         409 my $parsed = $self->parser->startrule( $scenario_string, 1, $from_file );
243 14 100       70 log_fatal("Cannot parse the scenario: $scenario_string") if !defined $parsed;
244 12         610 return $parsed;
245             }
246              
247             # reverse of parse_scenario_string, used in Treex::Core::Run for treex --dump
248             sub construct_scenario_string {
249 3     3 1 3676 my $self = shift;
250 3         8 my %args = @_;
251 3         8 my $multiline = $args{multiline};
252 3         6 my @block_items = @{ $self->block_items };
  3         92  
253 3 100       10 my $delim = $multiline ? qq{\n} : q{ };
254 3         6 my @block_strings;
255 3         10 foreach my $block_item (@block_items) {
256 7         15 my $name = $block_item->{block_name};
257 7         11 my @parameters = map { _add_quotes($_) } @{ $block_item->{block_parameters} };
  2         9  
  7         19  
258 7 100       37 $name =~ s{^Treex::Block::}{} or $name = "::$name"; #strip leading Treex::Block:: or add leading ::
259 7         12 my $params;
260 7 100       18 if ( scalar @parameters ) {
261 2         9 $params = q{ } . join q{ }, @parameters;
262             }
263             else {
264 5         9 $params = q{};
265             }
266 7         21 push @block_strings, $name . $params;
267             }
268 3         49 return join $delim, @block_strings;
269             }
270              
271             sub get_required_files {
272 0     0 0 0 my $self = shift;
273 0         0 my @block_items = @{ $self->block_items };
  0         0  
274 0         0 my @required_files;
275 0         0 foreach my $block_item (@block_items) {
276 0         0 my $block = $self->_load_block($block_item);
277             push @required_files,
278             map {
279 0         0 $block_item->{block_name} . "\t" . $_;
  0         0  
280             } $block->get_required_share_files();
281             }
282 0         0 return @required_files;
283             }
284              
285             sub _add_quotes { # adding quotes only if param. value contains a space
286 2     2   7 my ($block_parameter) = @_;
287 2         14 my ( $name, $value ) = split /=/, $block_parameter, 2;
288 2 50       12 if ( $value =~ /\s/ ) {
289 0         0 my $res_string = "$name=";
290              
291 0 0 0     0 if ( $value =~ /'/ && $value !~ /"/ ) {
292 0         0 $res_string .= '"' . $value . '"';
293             } else {
294 0         0 $value =~ s/'/\\'/g;
295 0         0 $res_string .= "'" . $value . "'";
296             }
297 0         0 return $res_string;
298             }
299 2         9 return $block_parameter;
300             }
301              
302             sub load_blocks {
303 0     0 1 0 my $self = shift;
304 0         0 $self->loaded_blocks; #just access lazy attribute
305 0         0 return;
306             }
307              
308             sub init {
309 0     0 1 0 my $self = shift;
310 0         0 $self->load_blocks();
311 0         0 return;
312             }
313              
314             sub _load_block {
315 20     20   61 my ( $self, $block_item ) = @_;
316 20         54 my $block_name = $block_item->{block_name};
317 20         32 my $new_block;
318              
319             # Initialize with global (scenario) parameters
320 20         36 my %params = ( %{ $self->_global_params }, scenario => $self );
  20         745  
321              
322             # which can be overriden by (local) block parameters.
323 20         46 foreach my $param ( @{ $block_item->{block_parameters} } ) {
  20         65  
324 12         65 my ( $name, $value ) = split /=/, $param, 2;
325 12         49 $params{$name} = $value;
326             }
327              
328 20 100   2   2126 eval "use $block_name; 1;" or log_fatal "Can't use block $block_name !\n$@\n";
  2     2   1387  
  2     2   12  
  2         50  
  2         649  
  2         9  
  2         61  
  2         20  
  2         4  
  2         53  
329 19 50       101 eval {
330 19         153 $new_block = $block_name->new( \%params );
331 19         237 1;
332             } or log_fatal "Treex::Core::Scenario->new: error when initializing block $block_name\n\nEVAL ERROR:\t$@";
333              
334 19 0 33     598 if ( $self->cache && $params{'use_cache'} ) {
335 0         0 $new_block = Treex::Core::CacheBlock->new( { block => $new_block, cache => $self->cache } );
336             }
337              
338 19         81 return $new_block;
339             }
340              
341             sub run {
342 1     1 1 1864 my ($self) = @_;
343 1         3 my $number_of_blocks = @{ $self->loaded_blocks };
  1         40  
344 1 50       42 log_fatal('No DocumentReader supplied') if !$self->_has_document_reader;
345 1         32 my $reader = $self->document_reader;
346 1   50     16 my $number_of_documents = $reader->number_of_documents_per_this_job() || '?';
347 1         3 my $document_number = 0;
348              
349             #if ( $self->cache ) {
350             # $document_number = $self->_run_with_cache( $reader, $number_of_blocks, $number_of_documents );
351             #}
352             #else {
353 1         8 $document_number = $self->_run_without_cache( $reader, $number_of_blocks, $number_of_documents );
354             #}
355              
356 1 50       15 log_info "Processed $document_number document"
357             . ( $document_number == 1 ? '' : 's' );
358 1         10 return 1;
359             }
360              
361             sub _run_with_cache {
362              
363 0     0   0 my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_;
364 0         0 my $document_number = 0;
365              
366 0         0 while ( my $document = $reader->next_document_for_this_job() ) {
367 0         0 $document_number++;
368 0         0 my $doc_name = $document->full_filename;
369 0         0 my $doc_from = $document->loaded_from;
370 0         0 log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from";
371 0         0 my $block_number = 0;
372 0         0 my $skip_to = 0;
373 0         0 my $process = 0;
374 0         0 my $skip_from = 0;
375 0         0 my $from_hash = "";
376 0         0 my $document_last_hash = "";
377 0         0 foreach my $block ( @{ $self->loaded_blocks } ) {
  0         0  
378 0         0 $block_number++;
379 0         0 $process = 1;
380 0 0       0 if ( $block_number < $skip_to ) {
    0          
381              
382             # we know that there are identical, so we can skip them
383 0         0 log_info "Skipping block $block_number/$number_of_blocks " . ref($block);
384 0         0 $process = 0;
385             }
386             elsif ( $block_number == $skip_to ) {
387              
388             # this is border Cache block -> we have to check whether next sequence is also same
389 0         0 $skip_from = $block_number + 1;
390              
391             # following sequence is same => we can continue with skipping
392 0 0 0     0 if ($sequence{$skip_from}{'to'}
393             &&
394             $self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() )
395             )
396             {
397              
398             #log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to});
399 0         0 $skip_to = $sequence{$skip_from}{to} - 1;
400 0         0 $from_hash = $document->get_hash();
401 0         0 $process = 0;
402             }
403             else {
404 0         0 $document_last_hash = $document->get_hash();
405              
406             #$document->set_hash(md5_hex($document->get_hash() . $block->get_hash()));
407 0         0 my $full_hash = $document->get_hash();
408 0         0 $document = $self->cache->get($full_hash);
409              
410 0 0       0 if ( !$document ) {
411 0         0 log_fatal("Document - $full_hash is missing!!!");
412             }
413 0         0 $process = 2;
414             }
415             }
416              
417 0 0       0 if ( $process == 1 ) {
418 0         0 log_info "Applying block $block_number/$number_of_blocks " . ref($block);
419              
420 0 0       0 $block->process_start if !$block->is_started;
421 0         0 $block->_set_is_started(1);
422              
423             #log_info("Document-hash: " . $document->get_hash());
424 0         0 $skip_from = $block_number + 1;
425 0         0 my $status = $block->process_document($document);
426 0 0 0     0 if (defined($status)
      0        
      0        
427             &&
428             $status == $Treex::Core::Block::DOCUMENT_FROM_CACHE &&
429             $sequence{$skip_from}{'to'} &&
430             $self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() )
431             )
432             {
433              
434             #log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to});
435 0         0 $skip_to = $sequence{$skip_from}{to} - 1;
436 0         0 $skip_from = $block_number + 1;
437 0         0 $from_hash = $document->get_hash();
438             }
439             }
440              
441 0         0 $document_last_hash = $document->get_hash();
442 0         0 $document->set_hash( md5_hex( $document->get_hash() . $block->get_hash() ) );
443              
444 0 0       0 if ( ref($block) eq "Treex::Core::CacheBlock" ) {
445              
446             # cache block => mark this path as known
447 0         0 my $id = $block_number + 1;
448 0         0 my $from = $sequence{$id}{'_from'};
449              
450             # the first sequence has no document
451 0 0       0 if ( defined( $sequence{$from}{'document'} ) ) {
452 0         0 $self->_set_known_sequence( $sequence{$from}{'hash'}, $sequence{$from}{'document'} );
453             }
454              
455 0         0 $sequence{$id}{'document'} = $document_last_hash;
456             }
457             }
458              
459             # this actually marks the document as successfully done in parallel processing (if this line
460             # does not appear in the output, the parallel process will fail -- it must appear at any errorlevel,
461             # therefore not using log_info or similiar)
462 0 0       0 if ( $self->document_reader->jobindex ) {
463 0         0 print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n";
464             }
465             }
466              
467 0         0 log_info "Applying process_end";
468              
469 0         0 foreach my $block ( @{ $self->loaded_blocks } ) {
  0         0  
470 0 0       0 $block->process_end() if ( $block->is_started );
471             }
472              
473 0         0 return $document_number;
474             }
475              
476             sub _is_known_sequence {
477 0     0   0 my ( $self, $sequence_hash, $document_hash ) = @_;
478 0         0 my $hash = md5_hex( $sequence_hash, $document_hash );
479 0         0 return $self->cache->get($hash);
480             }
481              
482             sub _set_known_sequence {
483 0     0   0 my ( $self, $sequence_hash, $document_hash ) = @_;
484 0         0 my $hash = md5_hex( $sequence_hash, $document_hash );
485 0         0 $self->cache->set( $hash, 1 );
486              
487 0         0 return;
488             }
489              
490             sub _run_without_cache {
491              
492 1     1   4 my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_;
493 1         2 my $document_number = 0;
494              
495 1         6 $self->start();
496              
497 1         16 while ( my $document = $reader->next_document_for_this_job() ) {
498 1         3 $document_number++;
499 1         14 my $doc_name = $document->full_filename;
500 1         30 my $doc_from = $document->loaded_from;
501 1         10 log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from";
502 1         3 my $block_number = 0;
503 1         5 foreach my $block ( @{ $self->loaded_blocks } ) {
  1         32  
504 2         6 $block_number++;
505 2         11 log_info "Applying block $block_number/$number_of_blocks " . ref($block);
506 2         32 $block->process_document($document);
507             }
508              
509             # this actually marks the document as successfully done in parallel processing (if this line
510             # does not appear in the output, the parallel process will fail -- it must appear at any errorlevel,
511             # therefore not using log_info or similiar)
512 1 50       33 if ( $self->document_reader->jobindex ) {
513 0         0 print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n";
514             }
515             }
516              
517 1         9 $self->end();
518              
519 1 50       10 log_info "Processed $document_number document"
520             . ( $document_number == 1 ? '' : 's' );
521              
522 1         3 return $document_number;
523             }
524              
525             # Apply process_start to all blocks for which this has not yet been applied
526             sub start {
527 1     1 1 4 my ($self) = @_;
528              
529 1         16 log_info "Applying process_start";
530 1         4 foreach my $block ( @{ $self->loaded_blocks } ) {
  1         35  
531 2 50       91 $block->process_start() if !$block->is_started;
532 2         73 $block->_set_is_started(1);
533             }
534              
535 1         10 return;
536             }
537              
538             # Apply the scenario to documents given in parameter
539             sub apply_to_documents {
540              
541 0     0 1 0 my ( $self, @documents ) = @_;
542              
543 0         0 my $number_of_blocks = @{ $self->loaded_blocks };
  0         0  
544 0         0 my $block_number = 0;
545              
546 0         0 foreach my $document (@documents){
547 0         0 log_info "Processing document" . $document->full_filename;
548              
549 0         0 foreach my $block ( @{ $self->loaded_blocks } ) {
  0         0  
550 0         0 $block_number++;
551 0         0 log_info "Applying block $block_number/$number_of_blocks " . ref($block);
552 0         0 $block->process_document($document);
553             }
554             }
555              
556 0         0 return;
557             }
558              
559             # Apply process_end to all blocks for which this has not yet been applied
560             sub end {
561 1     1 1 4 my ($self) = @_;
562              
563 1         5 log_info "Applying process_end";
564 1         2 foreach my $block ( @{ $self->loaded_blocks } ) {
  1         39  
565 2 50       64 $block->process_end() if ( $block->is_started );
566             }
567              
568 1         4 return;
569             }
570              
571 12     12   5717 use Module::Reload;
  12         5755  
  12         1452  
572              
573             sub restart {
574 0     0 1 0 my ($self) = @_;
575 0         0 my $changed_modules = Module::Reload->check;
576 0         0 log_info "Number of reloaded modules = $changed_modules";
577 0         0 log_info "reseting the document reader\n";
578 0         0 $self->document_reader->restart();
579              
580             # TODO rebuild the reloaded blocks
581 0         0 return;
582             }
583              
584             1;
585              
586             __END__
587              
588             =for Pod::Coverage BUILD
589              
590             =encoding utf-8
591              
592             =head1 NAME
593              
594             Treex::Core::Scenario - a larger Treex processing unit, composed of blocks
595              
596             =head1 VERSION
597              
598             version 2.20210102
599              
600             =head1 SYNOPSIS
601              
602             use Treex::Core;
603              
604             my $doc1, $doc2;
605             my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' );
606             $scenario->run;
607              
608              
609             $scenario = Treex::Core::Scenario->new(from_string => 'W2A::EN::Segment language=en');
610             $scenario->start();
611             $scenario->apply_to_documents($doc1, $doc2);
612             $scenario->end();
613              
614              
615             =head1 DESCRIPTION
616              
617              
618             A Treex scenario consists of a sequence of (possibly parametrized) Treex blocks.
619              
620             Scenarios can be described by a simple textual format, which is either passed
621             directly to the scenario construction, or is contained in a text file whose
622             name is passed.
623              
624             The string description of scenarios looks as follows.
625              
626             1) It contains a list of block names from which their 'C<Treex::Block::>'
627             prefixes were removed.
628              
629             2) The block names are separated by one or more white spaces.
630              
631             3) The block names are listed in the same order in which they should be
632             applied on data.
633              
634             4) For each block, there can be one or more parameters specified, using the
635             C<attribute=value> form.
636              
637             5) Comments start with 'C<#>' and end with the nearest newline character.
638              
639              
640             Scenario example:
641              
642             # morphological analysis of an English text
643             Util::SetGlobal language=en selector=src
644             Read::Text
645             W2A::ResegmentSentences
646             W2A::EN::Tokenize
647             W2A::EN::NormalizeForms
648             W2A::EN::FixTokenization
649             W2A::EN::TagMorce
650              
651              
652             =head1 METHODS
653              
654             =head2 Constructor
655              
656             =over 4
657              
658             =item my $scenario = Treex::Core::Scenario->new(from_string => 'W2A::Tokenize language=en W2A::Lemmatize' );
659              
660             Constructor parameter C<from_string> specifies the names of blocks which are
661             to be executed (in the specified order) when the scenario is applied on a
662             L<Treex::Core::Document> object.
663              
664             =item my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' );
665              
666             The scenario description is loaded from the file.
667              
668             =back
669              
670              
671             =head2 Running the scenario
672              
673             =over 4
674              
675             =item $scenario->run();
676              
677             Run the scenario.
678             One of the blocks (usually the first one) must be the document reader (see
679             L<Treex::Core::DocumentReader>) that produces the
680             documents on which this scenario is applied.
681              
682             =item $scenario->apply_to_documents($treex_doc);
683              
684             Apply this scenario to a L<Treex::Core::Document> instance obtained from elsewhere.
685             Please note that C<start()> must be called before the first call to this method and C<end()>
686             after the last call to this method.
687              
688             The scenario does not need to contain a document reader if documents are given
689             explicitly.
690              
691             =item $scenario->start();
692              
693             Apply C<process_start()> to all blocks in the scenario.
694             This is called automatically by C<run()>, but must be called before C<apply_to_documents()>.
695              
696             =item $scenario->end();
697              
698             Apply C<process_end()> to all blocks in the scenario.
699             This is called automatically by C<run()>, but must be called after calls to C<apply_to_documents()>.
700              
701              
702             =back
703              
704             =head2 Internal methods for loading scenarios
705              
706             =over 4
707              
708             =item _load_scenario_file($filename)
709              
710             loads a scenario description from a file
711              
712             =item parse_scenario_string
713              
714             parses a textual description of a scenario
715              
716             =item construct_scenario_string
717              
718             constructs a scenario textual description from an existing scenario instance
719             accepts named parameter multiline - when set, blocks are separated by newline instead of space
720              
721             =item load_blocks
722              
723             use blocks and call their constructors
724             can be used for preloading blocks for e.g. server applications
725             when running scenario blocks are loaded automatically
726              
727             =item init
728              
729             do all initialization so after this method scenario is ready to run
730             currently just load blocks
731              
732             =item restart
733              
734             resets the document reader, in future it will rebuild reloaded blocks
735              
736             =back
737              
738              
739             =head1 SEE ALSO
740              
741             L<Treex::Core::Block>
742             L<Treex::Core>
743              
744             =head1 AUTHORS
745              
746             Zdeněk Žabokrtský <zabokrtsky@ufal.mff.cuni.cz>
747              
748             Martin Popel <popel@ufal.mff.cuni.cz>
749              
750             David Mareček <marecek@ufal.mff.cuni.cz>
751              
752             Tomáš Kraut <kraut@ufal.mff.cuni.cz>
753              
754             Martin Majliš <majlis@ufal.mff.cuni.cz>
755              
756             Ondřej Dušek <odusek@ufal.mff.cuni.cz>
757              
758             =head1 COPYRIGHT AND LICENSE
759              
760             Copyright © 2011-2012 by Institute of Formal and Applied Linguistics, Charles University in Prague
761              
762             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.