File Coverage

blib/lib/Treex/Core/Run.pm
Criterion Covered Total %
statement 9 11 81.8
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 13 15 86.6


line stmt bran cond sub pod time code
1             package Treex::Core::Run;
2             $Treex::Core::Run::VERSION = '2.20160630';
3 1     1   55269 use 5.008;
  1         6  
4              
5 1     1   577 use Moose;
  1         400870  
  1         7  
6 1     1   7585 use Treex::Core::Common;
  1         8  
  1         7  
7 1     1   6601 use Treex::Core;
  0            
  0            
8             use MooseX::SemiAffordanceAccessor 0.09;
9             with 'MooseX::Getopt';
10              
11              
12             # TODO some of these modules might not be needed, check this
13             use Cwd;
14             use File::Path;
15             use File::Temp qw(tempdir);
16             use File::Which;
17             use List::MoreUtils qw(first_index);
18             use IO::Interactive;
19             use Time::HiRes;
20             use Readonly;
21             use POSIX;
22             use Exporter;
23             use Sys::Hostname;
24             use base 'Exporter';
25              
26             use File::Glob 'bsd_glob';
27              
28             our @EXPORT_OK = q(treex);
29              
30             has 'save' => (
31             traits => ['Getopt'],
32             cmd_aliases => 's',
33             is => 'rw',
34             isa => 'Bool',
35             default => 0,
36             documentation => 'save all documents',
37             );
38              
39             has 'quiet' => (
40             traits => ['Getopt'],
41             cmd_aliases => 'q',
42             is => 'rw',
43             isa => 'Bool',
44             default => 0,
45             trigger => sub { Treex::Core::Log::log_set_error_level('FATAL'); },
46             documentation => q{Warning, info and debug messages are suppressed. Only fatal errors are reported.},
47             );
48              
49             has 'cleanup' => (
50             traits => ['Getopt'],
51             is => 'rw', isa => 'Bool', default => 0,
52             documentation => q{Delete all temporary files.},
53             );
54              
55             has 'error_level' => (
56             traits => ['Getopt'],
57             cmd_aliases => 'e',
58             is => 'rw', isa => 'ErrorLevel', default => 'INFO',
59             trigger => sub { Treex::Core::Log::log_set_error_level( $_[1] ); },
60             documentation => q{Possible values: ALL, DEBUG, INFO, WARN, FATAL},
61             );
62              
63             has 'lang' => (
64             traits => ['Getopt'],
65             cmd_aliases => [ 'language', 'L' ],
66             is => 'rw', isa => 'Str',
67             documentation => q{shortcut for adding "Util::SetGlobal language=xy" at the beginning of the scenario},
68             );
69              
70             has 'selector' => (
71             traits => ['Getopt'],
72             cmd_aliases => 'S',
73             is => 'rw', isa => 'Str',
74             documentation => q{shortcut for adding "Util::SetGlobal selector=xy" at the beginning of the scenario},
75             );
76              
77             has 'tokenize' => (
78             traits => ['Getopt'],
79             cmd_aliases => 't',
80             is => 'rw', isa => 'Bool',
81             documentation => q{shortcut for adding "Read::Sentences W2A::Tokenize" at the beginning of the scenario (or W2A::XY::Tokenize if used with --lang=xy)},
82             );
83              
84              
85             # treex -h should not print "Unknown option: h" before the usage.
86             #has 'help' => (
87             # traits => ['Getopt'],
88             # cmd_aliases => 'h',
89             # is => 'ro', isa => 'Bool', default => 0,
90             # documentation => q{Print usage info},
91             #);
92              
93             has 'filenames' => (
94             traits => ['NoGetopt'],
95             is => 'rw',
96             isa => 'ArrayRef[Str]',
97             documentation => 'treex file names',
98             );
99              
100             has 'scenario' => (
101             traits => ['NoGetopt'],
102             is => 'rw',
103             isa => 'Treex::Core::Scenario',
104             predicate => '_has_scenario',
105             documentation => 'scenario object',
106             );
107              
108              
109             has 'watch' => (
110             traits => ['Getopt'],
111             is => 'ro',
112             isa => 'Str',
113             documentation => 're-run when the given file is changed TODO better doc',
114             );
115              
116             has 'dump_scenario' => (
117             traits => ['Getopt'],
118             cmd_aliases => 'd',
119             is => 'rw',
120             isa => 'Bool',
121             default => 0,
122             documentation => 'Just dump (print to STDOUT) the given scenario and exit.',
123             );
124              
125             has 'dump_required_files' => (
126             traits => ['Getopt'],
127             is => 'rw',
128             isa => 'Bool',
129             default => 0,
130             documentation => 'Just dump (print to STDOUT) files required by the given scenario and exit.',
131             );
132              
133             has 'cache' => (
134             traits => ['Getopt'],
135             is => 'rw',
136             isa => 'Str',
137             default => "",
138             documentation => 'Use cache. Required memory is specified in format memcached,loading. Numbers are in GB.',
139             );
140              
141             has version => (
142             traits => ['Getopt'],
143             is => 'ro',
144             isa => 'Bool',
145             default => 0,
146             cmd_aliases => 'v',
147             documentation => q(Print treex and perl version),
148             trigger => sub {
149             print get_version();
150             exit();
151             },
152             );
153              
154             #
155             # Parallel head execution options
156             # TODO move them to Treex::Core::Parallel::Head
157              
158             has 'forward_error_level' => (
159             traits => ['Getopt'],
160             cmd_aliases => 'E',
161             is => 'rw', isa => 'ErrorLevel', default => 'WARN',
162             documentation => q{messages with this level or higher will be forwarded from the distributed jobs to the main STDERR},
163             );
164              
165              
166             has 'parallel' => (
167             traits => ['Getopt'],
168             cmd_aliases => 'p',
169             is => 'rw',
170             isa => 'Bool',
171             default => 0,
172             documentation => 'Parallelize the task on SGE cluster (using qsub).',
173             );
174              
175             has 'jobs' => (
176             traits => ['Getopt'],
177             cmd_aliases => 'j',
178             is => 'ro',
179             isa => 'Int',
180             default => 10,
181             documentation => 'Number of jobs for parallelization, default 10. Requires -p.',
182             );
183              
184              
185             has 'local' => (
186             traits => ['Getopt'],
187             is => 'ro',
188             isa => 'Bool',
189             documentation => 'Run jobs locally (might help with multi-core machines). Requires -p.',
190             );
191              
192             has 'priority' => (
193             traits => ['Getopt'],
194             is => 'ro',
195             isa => 'Int',
196             default => -100,
197             documentation => 'Priority for qsub, an integer in the range -1023 to 0 (or 1024 for admins), default=-100. Requires -p.',
198             );
199              
200             has 'mem' => (
201             traits => ['Getopt'],
202             cmd_aliases => [ 'm', 'memory' ],
203             is => 'ro',
204             isa => 'Str',
205             default => '2G',
206             documentation => 'How much memory should be allocated for cluster jobs, default=2G. Requires -p. '
207             . 'Translates to "qsub -hard -l mem_free=$mem -l h_vmem=2*$mem -l act_mem_free=$mem". '
208             . 'Use --mem=0 and --qsub to set your own SGE settings (e.g. if act_mem_free is not available).',
209             );
210              
211             has 'name' => (
212             traits => ['Getopt'],
213             is => 'ro',
214             isa => 'Str',
215             default => '',
216             documentation => 'Prefix of submitted jobs. Requires -p. '
217             . 'Translates to "qsub -N $name-jobname".',
218             );
219              
220             has 'queue' => (
221             traits => ['Getopt'],
222             is => 'ro',
223             isa => 'Str',
224             default => '',
225             documentation => 'SGE queue. Translates to "qsub -q $queue".',
226             );
227              
228             has 'qsub' => (
229             traits => ['Getopt'],
230             is => 'ro',
231             isa => 'Str',
232             default => '',
233             documentation => 'Additional parameters passed to qsub. Requires -p. '
234             . 'See --priority and --mem. You can use e.g. --qsub="-q *@p*,*@s*" to use just machines p* and s*. '
235             . 'Or e.g. --qsub="-q *@!(twi*|pan*)" to skip twi* and pan* machines.',
236             );
237              
238             has 'workdir' => (
239             is => 'rw',
240             traits => ['Getopt'],
241             isa => 'Str',
242             default => './{NNN}-cluster-run-{XXXXX}',
243             documentation => 'working directory for temporary files in parallelized processing; ' .
244             'one can create automatic directories by using patterns: ' .
245             '{NNN} is replaced by an ordinal number with so many leading zeros to have length of the number of Ns, ' .
246             '{XXXX} is replaced by a random string, whose length is the same as the number of Xs (min. 4). ' .
247             'If not specified, directories such as 001-cluster-run, 002-cluster-run etc. are created',
248             );
249              
250             has 'sge_job_numbers' => (
251             is => 'rw',
252             traits => ['NoGetopt'],
253             documentation => 'list of numbers of jobs executed on sge',
254             default => sub { [] },
255             );
256              
257             has 'survive' => (
258             traits => ['Getopt'],
259             is => 'rw',
260             isa => 'Bool',
261             default => 0,
262             documentation => 'Continue collecting jobs\' outputs even if some of them crashed (risky, use with care!).',
263             );
264              
265              
266             #
267             # Parallel node/worker execution options
268             # TODO move them to Treex::Core::Parallel::Node
269              
270             has 'jobindex' => (
271             traits => ['Getopt'],
272             is => 'ro',
273             isa => 'Int',
274             documentation => 'Not to be used manually. If number of jobs is set to J and modulo set to M, only I-th files fulfilling I mod J == M are processed.',
275             );
276              
277             has 'outdir' => (
278             traits => ['Getopt'],
279             is => 'ro',
280             isa => 'Str',
281             documentation => 'Not to be used manually. Dictory for collecting standard and error outputs in parallelized processing.',
282             );
283              
284             has 'server' => (
285             traits => ['Getopt'],
286             is => 'ro',
287             isa => 'Str',
288             default => '',
289             documentation => 'Not to be used manually. Used to point parallel jobs to the head.',
290             );
291              
292             #
293             #
294             #
295              
296             sub _usage_format {
297             return "usage: %c %o scenario [-- treex_files]\nscenario is a sequence of blocks or *.scen files\noptions:";
298             }
299              
300             #gets info about version of treex and perl
301             sub get_version {
302             my $perl_v = $^V;
303             my $perl_x = $^X;
304             my $treex_v = $Treex::Core::Run::VERSION || 'DEV';
305             my $treex_x = which('treex');
306              
307             # File::Which::which sometimes fails to found treex.
308             if ( !defined $treex_x ) {
309             chomp( $treex_x = `which treex 2> /dev/null` );
310             $treex_x ||= '<treex not found in $PATH>';
311             }
312             my $version_string = <<"VERSIONS";
313             Treex version: $treex_v from $treex_x
314             Perl version: $perl_v from $perl_x
315             VERSIONS
316             return $version_string;
317             }
318              
319             sub BUILD {
320              
321             # more complicated tests on consistency of options will be place here
322             my ($self) = @_;
323              
324             return;
325             }
326              
327              
328             sub _execute {
329             my ($self) = @_;
330              
331             if ( $self->dump_scenario || $self->dump_required_files ) {
332              
333             # If someone wants to run treex -d My::Block my_scen.scen
334             my $scen_str = $self->_construct_scenario_string_with_quoted_whitespace();
335             $self->set_scenario( Treex::Core::Scenario->new( scenario_string => $scen_str, runner => $self ) );
336              
337             # TODO: Do it properly - perhaps, add a Scenario option to not load all the blocks.
338             # We cannot create the real scenario instance without loading all the blocks
339             # However, since r6307 some Scenario's functions were changed to methods, so we must create a dummy instance.
340              
341             #my @block_items = Treex::Core::Scenario::parse_scenario_string($scen_str);
342             #my @block_items = $dummy_scenario->parse_scenario_string($scen_str);
343              
344             if ( $self->dump_scenario ) {
345             print "# Full Scenario generated by 'treex --dump_scenario' on " . localtime() . "\n";
346             print $self->scenario->construct_scenario_string( multiline => 1 ), "\n";
347             }
348              
349             if ( $self->dump_required_files ) {
350             print "# Required files generated by 'treex --dump_required_files' on " . localtime() . "\n";
351             print join "\n", $self->scenario->get_required_files(), "\n";
352             }
353             exit;
354             }
355              
356             if ( $self->dump_required_files ) {
357              
358             my $scen_str = join ' ', @{ $self->extra_argv };
359             $self->set_scenario( Treex::Core::Scenario->new( scenario_string => $scen_str, runner => $self ) );
360              
361             print "# Required files generated by 'treex --dump_required_files' on " . localtime() . "\n";
362             print join "\n", $self->scenario->get_required_files(), "\n";
363             exit;
364             }
365             my $done = 0;
366             my $time;
367             my $watch = $self->watch;
368              
369             if ( defined $watch ) {
370             log_fatal "Watch file '$watch' does not exists" if !-f $watch;
371             $time = ( stat $watch )[9];
372             }
373              
374             while ( !$done ) {
375              
376             $self->_execute_scenario();
377              
378             $done = 1;
379             my $info_written = 0;
380             WATCH_CHANGE:
381             while ( defined $watch && -f $watch ) {
382             my $new_time = ( stat $watch )[9];
383             if ( $new_time > $time ) {
384             $time = $new_time;
385             $done = 0;
386             last WATCH_CHANGE;
387             }
388             if ( !$info_written ) {
389             log_info "Watching '$watch' file. Touch it to re-run, delete to quit.";
390             $info_written = 1;
391             }
392             sleep 1;
393             }
394             }
395             return;
396             }
397              
398             my %READER_FOR = (
399             'treex' => 'Treex',
400             'treex.gz' => 'Treex',
401             'txt' => 'Text',
402             'txt.gz' => 'Text',
403             'streex' => 'Treex',
404             'mrg' => 'PennMrg',
405             'mrg.gz' => 'PennMrg',
406             'tag' => 'CdtTag',
407              
408             # TODO:
409             # conll => 'Conll',
410             # plsgz => 'Plsgz',
411             # tmt
412             );
413              
414             sub _get_reader_name_for {
415             my $self = shift;
416             my @names = @_;
417             my $base_re = join( '|', keys %READER_FOR );
418             my $re = qr{\.($base_re)$};
419             my @extensions;
420             my $first;
421              
422             foreach my $name (@names) {
423             if ( $name =~ /$re/ ) {
424             my $current = $1;
425             $current =~ s/\.gz$//;
426             if ( !defined $first ) {
427             $first = $current;
428             }
429             if ( $current ne $first ) {
430             log_fatal 'All files (' . join( ',', @names ) . ') must have the same extension' . "\n" .
431             " current = $current\n" .
432             " first = $first\n" .
433             " curname = $name";
434             }
435             push @extensions, $current;
436             }
437             else {
438             log_fatal 'Files (' . join( ',', @names ) . ') must have extensions';
439             }
440             }
441             my $r = $READER_FOR{$first};
442             log_fatal "There is no DocumentReader implemented for extension '$first'" if !$r;
443             return "Read::$r";
444             }
445              
446             # This is where the main work is done. It is overridden in parallel execution.
447             sub _execute_scenario {
448             my ($self) = @_;
449              
450             log_info "Local (single-process) execution.";
451              
452             $self->_init_scenario();
453              
454             my $scenario = $self->scenario;
455              
456             my $runnin_started = time;
457             $scenario->run();
458              
459             log_info "Running the scenario took " . ( time - $runnin_started ) . " seconds";
460              
461             return;
462             }
463              
464             # Parameters can contain whitespaces that should be preserved
465             sub _construct_scenario_string_with_quoted_whitespace {
466             my ($self) = @_;
467             my @arguments;
468             foreach my $arg ( @{ $self->extra_argv } ) {
469             if ( $arg =~ /([^=\s]+)=(.*\s.*)$/ ) {
470             my ( $name, $value ) = ( $1, $2 );
471             $value =~ s/'/\\'/g;
472             push @arguments, qq($name='$value');
473             }
474             else {
475             push @arguments, $arg;
476             }
477             }
478             return join ' ', @arguments;
479             }
480              
481             sub _init_scenario {
482             my ($self) = @_;
483              
484             my $scen_str = $self->_construct_scenario_string_with_quoted_whitespace();
485              
486             # some command line options are just shortcuts for blocks; the blocks are added to the scenario now
487             if ( $self->filenames ) {
488             my $reader = $self->_get_reader_name_for( @{ $self->filenames } );
489             log_info "Block $reader added to the beginning of the scenario.";
490             $scen_str = "$reader from=" . join( ',', @{ $self->filenames } ) . " $scen_str";
491             }
492              
493             if ( $self->save ) {
494             log_info "Block Write::Treex added to the end of the scenario.";
495             $scen_str .= ' Write::Treex';
496             }
497              
498             if ( $self->tokenize ) {
499             my $tokenizer = 'W2A::Tokenize';
500             my $lang = $self->lang;
501             if ($lang && $lang ne 'all'){
502             my $module = 'Treex::Block::W2A::' . uc($lang) . '::Tokenize';
503             if (eval "use $module;1"){
504             $tokenizer = 'W2A::' . uc($lang) . '::Tokenize';
505             }
506             }
507             $scen_str = "Read::Sentences $tokenizer $scen_str";
508             }
509              
510             if ( $self->lang ) {
511             $scen_str = 'Util::SetGlobal language=' . $self->lang . " $scen_str";
512             }
513              
514             if ( $self->selector ) {
515             $scen_str = 'Util::SetGlobal selector=' . $self->selector . " $scen_str";
516             }
517              
518             my $loading_started = time;
519             if ( $self->_has_scenario ) {
520             $self->scenario->restart();
521             }
522             else {
523             $self->set_scenario( Treex::Core::Scenario->new( from_string => $scen_str, runner => $self ) );
524             $self->scenario->load_blocks;
525             }
526              
527             my $loading_ended = time;
528             log_info "Loading the scenario took " . ( $loading_ended - $loading_started ) . " seconds";
529              
530             return;
531             }
532              
533              
534             # A factory subroutine, creating the right Treex object for the job.
535             # (local single-process: Treex::Core::Run, parallel processing head: Treex::Parallel::Head,
536             # parallel processing worker node: Treex::Parallel::Node)
537             sub treex {
538              
539             # ref to array of arguments, or a string containing all arguments as on the command line
540             my $arguments = shift;
541              
542             if ( ref($arguments) eq 'ARRAY' && scalar @$arguments > 0 ) {
543             my $idx = first_index { $_ eq '--' } @$arguments;
544             my %args = ( argv => $arguments );
545             if ( $idx != -1 ) {
546             $args{filenames} = [ splice @$arguments, $idx + 1 ];
547             pop @$arguments; # delete "--"
548             }
549             my $runner;
550              
551             if (any { $_ =~ /^--jobindex/ } @$arguments){
552             require Treex::Core::Parallel::Node;
553             $runner = Treex::Core::Parallel::Node->new_with_options( \%args );
554             }
555             elsif (any { $_ =~ /^(--parallel|-p|-pj\d+)$/ } @$arguments){
556             require Treex::Core::Parallel::Head;
557             $runner = Treex::Core::Parallel::Head->new_with_options( \%args );
558             }
559             else {
560             $runner = Treex::Core::Run->new_with_options( \%args );
561             }
562             $runner->_execute();
563              
564             }
565              
566             elsif ( defined $arguments && ref($arguments) ne 'ARRAY' ) {
567             treex( [ grep { defined $_ && $_ ne '' } split( /\s/, $arguments ) ] );
568             }
569              
570             else {
571             treex('--help');
572              
573             #log_fatal 'Unspecified arguments for running treex.';
574             }
575             return;
576             }
577              
578             1;
579              
580             __END__
581              
582             =head2 --watch option
583              
584             SYNOPSIS:
585             touch timestamp.file
586             treex --watch=timestamp.file my.scen & # or without & and open another terminal
587             # after all documents are processed, treex is still running, watching timestamp.file
588             # you can modify any modules/blocks and then
589             touch timestamp.file
590             # All modified modules will be reloaded (the number of reloaded modules is printed).
591             # The document reader is restarted, so it starts reading the first file again.
592             # To exit this "watching loop" either rm timestamp.file or press Ctrl^C.
593              
594             BENEFITS:
595             * much faster development cycles (e.g. most time of en-cs translation is spent on loading)
596             * Now I have some non-deterministic problems with loading NER::Stanford
597             - using --watch I get it loaded on all jobs once and then I don't have to reload it.
598              
599             TODO:
600             * modules are just reloaded, no constructors are called yet
601              
602              
603             =for Pod::Coverage BUILD get_version
604              
605             =encoding utf-8
606              
607             =head1 NAME
608              
609             Treex::Core::Run + treex - applying Treex blocks and/or scenarios on data
610              
611             =head1 VERSION
612              
613             version 2.20160630
614              
615             =head1 SYNOPSIS
616              
617             In bash:
618              
619             > treex myscenario.scen -- data/*.treex
620             > treex My::Block1 My::Block2 -- data/*.treex
621              
622             In Perl:
623              
624             use Treex::Core::Run q(treex);
625             treex([qw(myscenario.scen -- data/*.treex)]);
626             treex([qw(My::Block1 My::Block2 -- data/*.treex)]);
627              
628             =head1 DESCRIPTION
629              
630             C<Treex::Core::Run> allows to apply a block, a scenario, or their mixture on a
631             set of data files. It is designed to be used primarily from bash command line,
632             using a thin front-end script called C<treex>. However, the same list of
633             arguments can be passed by an array reference to the function C<treex()>
634             imported from C<Treex::Core::Run>.
635              
636             Note that this module supports distributed processing (Linux-only!), simply by
637             adding the switch C<-p>. The C<treex> method then creates a
638             C<Treex::Core::Parallel::Head> object, which extends C<Treex::Core::Run>
639             by providing parallel processing functionality.
640              
641             Then there are two ways to process the data in a parallel fashion. By
642             default, SGE cluster\'s C<qsub> is expected to be available. If you have no
643             cluster but want to make the computation parallelized at least on a multicore
644             machine, add the C<--local> switch.
645              
646             =head1 SUBROUTINES
647              
648             =over 4
649              
650             =item treex
651              
652             create new runner and runs scenario given in parameters
653              
654             =back
655              
656             =head1 USAGE
657              
658             usage: treex [-?dEehjLmpqSstv] [long options...] scenario [-- treex_files]
659             scenario is a sequence of blocks or *.scen files
660             options:
661             -h -? --usage --help Prints this usage information.
662             -s --save save all documents
663             -q --quiet Warning, info and debug messages
664             are suppressed. Only fatal errors
665             are reported.
666             --cleanup Delete all temporary files.
667             -e STR --error_level STR Possible values: ALL, DEBUG,
668             INFO, WARN, FATAL
669             -L STR --language STR --lang STR shortcut for adding
670             "Util::SetGlobal language=xy" at
671             the beginning of the scenario
672             -S STR --selector STR shortcut for adding
673             "Util::SetGlobal selector=xy" at
674             the beginning of the scenario
675             -t --tokenize shortcut for adding
676             "Read::Sentences W2A::Tokenize"
677             at the beginning of the scenario
678             (or W2A::XY::Tokenize if used
679             with --lang=xy)
680             --watch STR re-run when the given file is
681             changed TODO better doc
682             -d --dump_scenario Just dump (print to STDOUT) the
683             given scenario and exit.
684             --dump_required_files Just dump (print to STDOUT) files
685             required by the given scenario
686             and exit.
687             --cache STR Use cache. Required memory is
688             specified in format
689             memcached,loading. Numbers are in
690             GB.
691             -v --version Print treex and perl version
692             -E STR --forward_error_level STR messages with this level or
693             higher will be forwarded from the
694             distributed jobs to the main
695             STDERR
696             -p --parallel Parallelize the task on SGE
697             cluster (using qsub).
698             -j INT --jobs INT Number of jobs for
699             parallelization, default 10.
700             Requires -p.
701             --local Run jobs locally (might help with
702             multi-core machines). Requires -p.
703             --priority INT Priority for qsub, an integer in
704             the range -1023 to 0 (or 1024 for
705             admins), default=-100. Requires
706             -p.
707             --memory STR -m STR --mem STR How much memory should be
708             allocated for cluster jobs,
709             default=2G. Requires -p.
710             Translates to "qsub -hard -l
711             mem_free=$mem -l h_vmem=2*$mem -l
712             act_mem_free=$mem". Use --mem=0
713             and --qsub to set your own SGE
714             settings (e.g. if act_mem_free is
715             not available).
716             --name STR Prefix of submitted jobs.
717             Requires -p. Translates to "qsub
718             -N $name-jobname".
719             --queue STR SGE queue. Translates to "qsub -q
720             $queue".
721             --qsub STR Additional parameters passed to
722             qsub. Requires -p. See --priority
723             and --mem. You can use e.g.
724             --qsub="-q *@p*,*@s*" to use just
725             machines p* and s*. Or e.g.
726             --qsub="-q *@!(twi*|pan*)" to
727             skip twi* and pan* machines.
728             --workdir STR working directory for temporary
729             files in parallelized processing;
730             one can create automatic
731             directories by using patterns:
732             {NNN} is replaced by an ordinal
733             number with so many leading zeros
734             to have length of the number of
735             Ns, {XXXX} is replaced by a
736             random string, whose length is
737             the same as the number of Xs
738             (min. 4). If not specified,
739             directories such as
740             001-cluster-run, 002-cluster-run
741             etc. are created
742             --survive Continue collecting jobs' outputs
743             even if some of them crashed
744             (risky, use with care!).
745             --jobindex INT Not to be used manually. If
746             number of jobs is set to J and
747             modulo set to M, only I-th files
748             fulfilling I mod J == M are
749             processed.
750             --outdir STR Not to be used manually. Dictory
751             for collecting standard and error
752             outputs in parallelized
753             processing.
754             --server STR Not to be used manually. Used to
755             point parallel jobs to the head.
756              
757             =head1 AUTHORS
758              
759             ZdenÄ›k Žabokrtský <zabokrtsky@ufal.mff.cuni.cz>
760              
761             Martin Popel <popel@ufal.mff.cuni.cz>
762              
763             Martin MajliÅ¡
764              
765             OndÅ™ej DuÅ¡ek <odusek@ufal.mff.cuni.cz>
766              
767             =head1 COPYRIGHT AND LICENSE
768              
769             Copyright © 2011-2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
770              
771             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.