File Coverage

lib/MediaCloud/JobManager.pm
Criterion Covered Total %
statement 27 57 47.3
branch 0 10 0.0
condition 0 4 0.0
subroutine 9 13 69.2
pod 1 2 50.0
total 37 86 43.0


line stmt bran cond sub pod time code
1              
2             =head1 NAME
3            
4             C<MediaCloud::JobManager> - Perl worker / client library for running jobs
5             asynchronously.
6            
7             =head1 SYNOPSIS
8            
9             use MediaCloud::JobManager;
10            
11             =head1 DESCRIPTION
12            
13             Run jobs locally, remotely or remotely + asynchronously.
14            
15             =head2 EXPORT
16            
17             None by default.
18            
19             =head1 AUTHOR
20            
21             Linas Valiukas, E<lt>lvaliukas@cyber.law.harvard.eduE<gt>
22            
23             =head1 COPYRIGHT AND LICENSE
24            
25             Copyright (C) 2013- Linas Valiukas, 2013- Berkman Center for Internet &
26             Society.
27            
28             This library is free software; you can redistribute it and/or modify it under
29             the same terms as Perl itself, either Perl version 5.18.2 or, at your option,
30             any later version of Perl 5 you may have available.
31            
32             =cut
33              
34             package MediaCloud::JobManager;
35              
36             our $VERSION = '0.27';
37              
38 1     1   6 use strict;
  1         2  
  1         38  
39 1     1   4 use warnings;
  1         2  
  1         30  
40 1     1   4 use Modern::Perl "2012";
  1         5  
  1         7  
41              
42 1     1   128 use MediaCloud::JobManager::Configuration;
  1         2  
  1         32  
43              
44 1     1   400 use UUID::Tiny ':std';
  1         21043  
  1         237  
45 1     1   9 use Digest::SHA qw(sha256_hex);
  1         1  
  1         57  
46 1     1   5 use Carp;
  1         2  
  1         47  
47 1     1   446 use Readonly;
  1         3234  
  1         66  
48              
49 1     1   693 use Log::Log4perl qw(:easy);
  1         46902  
  1         11  
50             Log::Log4perl->easy_init(
51                 {
52                     level => $DEBUG,
53                     utf8 => 1,
54                     layout => "%d{ISO8601} [%P]: %m%n"
55                 }
56             );
57              
58             # flush sockets after every write
59             $| = 1;
60              
61             # Max. job ID length for MediaCloud::JobManager jobs (when
62             # MediaCloud::JobManager::Job comes up with a job ID of its own)
63             Readonly my $MJM_JOB_ID_MAX_LENGTH => 256;
64              
65             =head2 (static) C<job_status($function_name, $job_id[, $config])>
66            
67             Get job status.
68            
69             Parameters:
70            
71             =over 4
72            
73             =item * Function name (e.g. "NinetyNineBottlesOfBeer")
74            
75             =item * Job ID (e.g. "H:localhost.localdomain:8")
76            
77             =back
78            
79             Returns hashref with the job status, e.g.:
80            
81             =begin text
82            
83             { # Job ID that was passed as a parameter 'job_id' =>
84             'H:tundra.home:8',
85            
86             # Whether or not the job is currently running
87             'running' => 1,
88            
89             # Numerator and denominator of the job's progress
90             # (in this example, job is 1333/2000 complete)
91             'numerator' => 1333,
92             'denominator' => 2000
93             };
94            
95             =end text
96            
97             Returns undef if the job ID was not found; dies on error.
98            
99             =cut
100              
101             sub job_status($$)
102             {
103 0     0 1       my ( $function_name, $job_id ) = @_;
104              
105 0               my $config = $function_name->configuration();
106              
107 0               return $config->{ broker }->job_status( $function_name, $job_id );
108             }
109              
110             # (static) Return an unique job ID that will identify a particular job with its
111             # arguments
112             #
113             # * function name, e.g. 'NinetyNineBottlesOfBeer'
114             # * hashref of job arguments, e.g. "{ 'how_many_bottles' => 13 }"
115             #
116             # Returns: SHA256 of the unique job ID, e.g. "18114c0e14fe5f3a568f73da16130640b1a318ba"
117             # (SHASUM of "NinetyNineBottlesOfBeer(how_many_bottles_=_2000)"
118             #
119             # FIXME maybe use Data::Dumper?
120             sub unique_job_id($$)
121             {
122 0     0 0       my ( $function_name, $job_args ) = @_;
123              
124 0 0             unless ( $function_name )
125                 {
126 0                   return undef;
127                 }
128              
129             # Convert to string
130                 $job_args =
131                   ( $job_args and scalar keys %{ $job_args } )
132 0 0 0             ? join( ', ', map { $_ . ' = ' . ( $job_args->{ $_ } // 'undef' ) } sort( keys %{ $job_args } ) )
  0   0        
  0            
133                   : '';
134 0               my $unique_id = "$function_name($job_args)";
135              
136             # Job broker might limit the length of "unique" parameter
137 0               $unique_id = sha256_hex( $unique_id );
138              
139 0               return $unique_id;
140             }
141              
142             # (static) Return an unique, path-safe job name which is suitable for writing
143             # to the filesystem (e.g. for logging)
144             #
145             # Parameters:
146             # * function name, e.g. 'NinetyNineBottlesOfBeer'
147             # * hashref of job arguments, e.g. "{ 'how_many_bottles' => 13 }"
148             # * (optional) Job ID, e.g. "H:tundra.home:18" or "127.0.0.1:4730//H:tundra.home:18"
149             #
150             # Returns: unique job ID, e.g.:
151             # * "084567C4146F11E38F00CB951DB7256D.NinetyNineBottlesOfBeer(how_many_bottles_=_2000)", or
152             # * "H_tundra.home_18.NinetyNineBottlesOfBeer(how_many_bottles_=_2000)"
153             sub _unique_path_job_id($$;$)
154             {
155 0     0         my ( $function_name, $job_args, $job_id ) = @_;
156              
157 0 0             unless ( $function_name )
158                 {
159 0                   return undef;
160                 }
161              
162 0               my $unique_id;
163 0 0             if ( $job_id )
164                 {
165              
166             # If job ID was passed as a parameter, this means that the job
167             # was run remotely (by running run_remotely() or add_to_queue()).
168             # Thus, the job has to be logged to a location that can later be found
169             # by knowing the job ID.
170              
171 0                   my $config = $function_name->configuration();
172              
173             # Strip the host part (if present)
174 0                   $unique_id = $config->{ broker }->job_id_from_handle( $job_id );
175              
176                 }
177                 else
178                 {
179              
180             # If no job ID was provided, this means that the job is being
181             # run locally.
182             # The job's output still has to be logged somewhere, so we generate an
183             # UUID to serve in place of job ID.
184              
185 0                   my $uuid = uc( create_uuid_as_string( UUID_RANDOM ) );
186 0                   $uuid =~ s/\-//gs;
187              
188 0                   $unique_id = $uuid;
189                 }
190              
191             # ID goes first in case the job name shortener decides to cut out a part of the job ID
192 0               my $mjm_job_id = $unique_id . '.' . unique_job_id( $function_name, $job_args );
193 0 0             if ( length( $mjm_job_id ) > $MJM_JOB_ID_MAX_LENGTH )
194                 {
195 0                   $mjm_job_id = substr( $mjm_job_id, 0, $MJM_JOB_ID_MAX_LENGTH );
196                 }
197              
198             # Sanitize for paths
199 0               $mjm_job_id = _sanitize_for_path( $mjm_job_id );
200              
201 0               return $mjm_job_id;
202             }
203              
204             sub _sanitize_for_path($)
205             {
206 0     0         my $string = shift;
207              
208 0               $string =~ s/[^a-zA-Z0-9\.\-_\(\)=,]/_/gi;
209              
210 0               return $string;
211             }
212              
213             1;
214