File Coverage

blib/lib/Net/Amazon/MechanicalTurk/Command/LoadHITs.pm
Criterion Covered Total %
statement 40 153 26.1
branch 2 68 2.9
condition 0 6 0.0
subroutine 13 19 68.4
pod 0 5 0.0
total 55 251 21.9


line stmt bran cond sub pod time code
1             package Net::Amazon::MechanicalTurk::Command::LoadHITs;
2 2     2   23334 use strict;
  2         4  
  2         72  
3 2     2   10 use warnings;
  2         5  
  2         54  
4 2     2   12 use Carp;
  2         3  
  2         176  
5 2     2   942 use IO::File;
  2         11394  
  2         366  
6 2     2   1273 use Net::Amazon::MechanicalTurk::BulkSupport;
  2         8  
  2         76  
7 2     2   19 use Net::Amazon::MechanicalTurk::DataStructure;
  2         4  
  2         40  
8 2     2   1362 use Net::Amazon::MechanicalTurk::RowData;
  2         6  
  2         47  
9 2     2   566 use Net::Amazon::MechanicalTurk::Properties;
  2         3  
  2         53  
10 2     2   11 use Net::Amazon::MechanicalTurk::Template;
  2         10  
  2         50  
11 2     2   10 use Net::Amazon::MechanicalTurk::Template::ReplacementTemplate;
  2         4  
  2         44  
12 2     2   9 use Net::Amazon::MechanicalTurk::DelimitedWriter;
  2         4  
  2         3408  
13              
14             our $VERSION = '1.00';
15              
16             =head1 NAME
17              
18             Net::Amazon::MechanicalTurk::Command::LoadHITs - Bulk Loading support for Amazon Mechancial Turk.
19              
20             This module adds the loadHITs method to the Net::Amazon::MechanicalTurk class.
21              
22             =head1 SYNOPSIS
23              
24             # See the sample loadHITs from the source distribution.
25              
26             sub questionTemplate {
27             my %params = %{$_[0]};
28             return <
29            
30            
31            
32             1
33            
34             $params{question}
35            
36            
37            
38            
39            
40            
41             END_XML
42             }
43            
44             my $properties = {
45             Title => 'LoadHITs Perl sample',
46             Description => 'This is a test of the bulk loading API.',
47             Keywords => 'LoadHITs, bulkload, perl',
48             Reward => {
49             CurrencyCode => 'USD',
50             Amount => 0.01
51             },
52             RequesterAnnotation => 'test',
53             AssignmentDurationInSeconds => 60 * 60,
54             AutoApprovalDelayInSeconds => 60 * 60 * 10,
55             MaxAssignments => 3,
56             LifetimeInSeconds => 60 * 60
57             };
58            
59             my $mturk = Net::Amazon::MechanicalTurk->new;
60            
61             $mturk->loadHITs(
62             properties => $properties,
63             input => "loadhits-input.csv",
64             question => \&questionTemplate,
65             progress => \*STDOUT,
66             success => "loadhits-success.csv",
67             fail => "loadhits-failure.csv"
68             );
69              
70             =head1 C
71              
72             loadHITs
73              
74             Bulk loads many hits of the same hit type into mechanical turk.
75             The method takes a set of properties used to create a HITType and its
76             associated HITs. To generate questions for HITs, rows of data are
77             pulled from an input source which is merged against a question template
78             to generate the question xml. For each row in an input source, 1 HIT
79             is generated. Note: The source distribution of the Mechanical Turk Perl SDK
80             contains samples using this method.
81              
82             loadHITs takes a hash reference or a hash with the following parameters:
83              
84            
85             properties - (required) Either a hash reference or the name of a file,
86             containing the properties to use for generating a HITType
87             and the associated HITs. When the properties are read from
88             a file, the method
89             Net::Amazon::MechanicalTurk::Properties->readNestedData is
90             used.
91            
92             input - (required) The input source for row data.
93             This parameter may be of the following types:
94             - Net::Amazon::MechanicalTurk::RowData
95             - An array of hashes.
96             (This is internally converted into an object of type:
97             Net::Amazon::MechanicalTurk::RowData::ArrayHashRowData)
98             - A reference to a subroutine. When the loadHITs method
99             asks for row data, the subroutine will be called and
100             passed a subroutine reference, which should be called
101             for every row generated by the input. The generated row
102             should be a hash reference.
103             (This is internally converted into an object of type
104             Net::Amazon::MechanicalTurk::RowData::SubroutineRowData)
105             - The name of a file. The file should be either a CSV or
106             tab delimited file. If the file name ends with '.csv',
107             it will read as a CSV, otherwise it is assumed to be
108             tab delimited. The first row in the file should contain
109             the column names. Each subsequent row becomes a hash
110             reference based on the column names.
111             (This is internally converted into an object of type
112             Net::Amazon::MechanicalTurk::RowData::DelimitedRowData)
113            
114             question - (required) The question template used to generate questions.
115             This parameter may be of the following types:
116             - An object of type Net::Amazon::MechanicalTurk::Template.
117             - A subroutine. The subroutine will be given a hash
118             reference representing the current input row.
119             (This is internally converted into an object of type
120             Net::Amazon::MechanicalTurk::Template::SubroutineTemplate)
121             - A filename ending in .rt or .question. This is a text
122             file which contains variables, which will be substituted
123             from the input row. Variables in the text file have
124             the syntax ${var_name}.
125             - A filename ending in .pl. This is a perl script, which
126             has 2 variables set named %params and $out. %params are
127             the parameters representing the input row and $out is
128             the IO::Handle the question should be written to. Before
129             this script is invoked, the $out handle is selected as
130             the default handle, so calls to print and printf without
131             a handle, will go to $out.
132             Note: Use of this type of question, requires the
133             IO::String module.
134            
135             preview - (optional) If preview is specified, a HITType and no HITs
136             will be created, instead, the preview parameter will be
137             given the parameters that would be used create the HIT.
138             This parameter may be of the following types:
139             - A subroutine. The subroutine is called with the
140             CreateHIT parameters.
141             - An IO::Handle. Each question from the CreateHIT
142             parameters will be printed to the handle.
143             - The name of a file. Each question from the CreateHIT
144             parameters will be printed to the file.
145            
146             progress - (optional) Used to display progress messages. This
147             parameter may be of the following types:
148             - A subroutine. The subroutine is called with 1 parameter,
149             a message to be displayed.
150             - An IO::Handle. The progress message is written to the
151             handle.
152            
153             success - (optional) Used to handle a successfully created hit. This
154             parameter may be of the following types:
155             - A filename. HITId's and HITTypeId's will be written to
156             this file. The file will be in a delimited format,
157             with the first row containing column headers. If the
158             filename ends in ".csv" the file format will be CSV,
159             otherwise it will be tab delimited.
160             - A subroutine. The subroutine is called when a hit is
161             created and passed a hash with the following parameters:
162             - mturk - A handle to the mturk client.
163             - fields - An array reference of the field names
164             for the input row.
165             - row - The input row the hit was created
166             from.
167             - parameters - The parameters given to CreateHIT.
168             - HITId - The HITId created.
169             - HITTypeId - The HITTypeId of the hit created.
170            
171             fail - (optional) Used to handle a hit which failed creation. If
172             this value is not specified and a hit fails creation, an
173             error will be raised. This value may be of the following
174             types:
175             - A filename. The input row will be written back to the
176             file in a delimited format. If the file name ends with
177             ".csv", then the file will be in CSV format, otherwise
178             it will be in a tab delimited format.
179             - A subroutine. The subroutine will be called back with
180             a hash containing the following values:
181             - mturk - A handle to the mturk client.
182             - fields - An array reference of the field names
183             for the input row.
184             - row - The input row the hit was created
185             from.
186             - parameters - The parameters given to CreateHIT.
187             - HITTypeId - The HITTypeId that was used in the
188             CreateHIT call.
189             - error - The error message associated with
190             the failure.
191            
192             maxHits - (optional) If this value is greater than 0, than at most
193             maxHits will be created.
194            
195             entityEscapeInput - (optional) If this value is a true value then the
196             input row will have certain values encoded as xml
197             entities, before being passed to the template.
198             The unescaped values will be accessible as _raw.
199             The characters escaped are >, <, &, ' and ".
200             This parameter is on by default.
201              
202             =cut
203              
204              
205             sub loadHITs {
206 0     0 0 0 my $mturk = shift;
207 0         0 my %params = @_;
208            
209 0         0 foreach my $required (qw{ properties input question }) {
210 0 0       0 if (!exists $params{$required}) {
211 0         0 Carp::croak("Missing required parameter $required.");
212             }
213             }
214            
215 0         0 my $preview = previewBlock($params{preview});
216 0         0 my $progress = Net::Amazon::MechanicalTurk::BulkSupport::progressBlock($params{progress});
217 0         0 my $success = Net::Amazon::MechanicalTurk::BulkSupport::successBlock($params{success});
218 0         0 my $fail = Net::Amazon::MechanicalTurk::BulkSupport::failBlock($params{fail});
219 0 0       0 my $maxHits = (exists $params{maxHits}) ? $params{maxHits} : -1;
220            
221 0 0       0 $params{entityEscapeInput} = (exists $params{entityEscapeInput}) ? $params{entityEscapeInput} : 1;
222            
223 0 0       0 if ($progress) {
224 0         0 $progress->("--[Initializing] " . scalar localtime() . " ---");
225 0         0 $progress->(" URL: " . $mturk->serviceUrl);
226 0         0 $progress->(" Properties: $params{properties}");
227 0         0 $progress->(" Input: $params{input}");
228 0         0 $progress->(" Question: $params{question}");
229             }
230            
231 0         0 my $properties = Net::Amazon::MechanicalTurk::Properties->toProperties($params{properties});
232 0         0 my $input = Net::Amazon::MechanicalTurk::RowData->toRowData($params{input});
233 0         0 my $question = Net::Amazon::MechanicalTurk::Template->toTemplate($params{question});
234            
235 0         0 my $createHITTypeProperties = Net::Amazon::MechanicalTurk::BulkSupport::getCreateHITTypeProperties($properties);
236 0         0 my $createHITProperties = Net::Amazon::MechanicalTurk::BulkSupport::getCreateHITProperties($properties);
237              
238 0         0 my $hitTypeId = -1;
239 0         0 my $exitedEach = 0;
240 0         0 my $rowNumber = 0;
241 0         0 my $hitsLoaded = 0;
242 0         0 my $failures = 0;
243 0         0 my $start = time();
244 0         0 my $lastHITId;
245            
246 0         0 eval {
247            
248             # Create HITType
249 0 0       0 if (!$preview) {
250 0 0       0 $progress->("--[Creating HITType] " . scalar localtime() . " ---") if ($progress);
251 0         0 $hitTypeId = Net::Amazon::MechanicalTurk::BulkSupport::createHITType($mturk, $createHITTypeProperties, $properties, $progress);
252 0 0       0 $progress->(" Created HITType (HITTypeId: $hitTypeId)") if ($progress);
253             }
254            
255 0 0       0 $progress->("--[Loading HITs] " . scalar localtime() . " ---") if ($progress);
256             $input->each(sub {
257 0     0   0 my ($_self, $row) = @_;
258 0         0 $rowNumber++;
259            
260 0 0 0     0 if ($maxHits >= 0 and $rowNumber > $maxHits) {
261 0         0 $exitedEach = 1;
262 0         0 die "Exiting each loop";
263             }
264            
265             # Create hit params
266 0         0 my $hitProps = { HITTypeId => $hitTypeId };
267 0         0 while (my ($htProp, $htTempl) = each %$createHITProperties) {
268 0         0 $hitProps->{$htProp} = $htTempl->execute($row);
269             }
270            
271             # Get a merged copy of row and properties
272 0         0 my $templateParams;
273 0 0       0 if ($params{entityEscapeInput}) {
274             # Entity encodes values in the hash
275             # and makes copies of the original values
276             # with the key name _raw.
277 0         0 $templateParams = xmlEntityEscapeHashValues($row);
278             }
279             else {
280 0         0 $templateParams = {%{$row}}; # makes a copy
  0         0  
281             }
282            
283 0         0 while (my ($pKey, $pVal) = each %$properties) {
284 0 0       0 if (!exists $templateParams->{$pKey}) {
285 0         0 $templateParams->{$pKey} = $pVal;
286             }
287             }
288            
289 0         0 $hitProps->{Question} = $question->execute($templateParams);
290            
291 0 0       0 if ($preview) {
292 0         0 $preview->($hitProps);
293             }
294             else { # CreateHIT
295 0         0 eval {
296 0         0 $lastHITId = $mturk->CreateHIT($hitProps)->{HITId}[0];
297 0 0       0 $progress->(" Created HIT $rowNumber (HITId: $lastHITId).") if ($progress);
298 0         0 $hitsLoaded++;
299 0         0 $success->(
300             mturk => $mturk,
301             fields => $input->fieldNames,
302             row => $row,
303             parameters => $hitProps,
304             HITId => $lastHITId,
305             HITTypeId => $hitTypeId
306             );
307             };
308 0 0       0 if ($@) {
309 0         0 $failures++;
310 0 0       0 $progress->(" $@") if $progress;
311 0         0 $fail->(
312             mturk => $mturk,
313             fields => $input->fieldNames,
314             row => $row,
315             parameters => $hitProps,
316             HITTypeId => $hitTypeId,
317             error => $@
318             );
319             }
320             } # End CreateHIT
321            
322 0         0 }); # End each
323             };
324 0 0 0     0 if ($@ and !$exitedEach) {
325 0         0 my $message = "\nAn error occurred while loading a HIT.\n";
326 0         0 $message .= "\n$@\n";
327 0 0       0 if ($mturk->response) {
328 0 0       0 if ($mturk->response->errorCode) {
329 0         0 $message .= "\nError Code: " . $mturk->response->errorCode . "\n";
330 0         0 $message .= "Error Message: " . $mturk->response->errorMessage . "\n";
331             }
332             }
333 0 0       0 if ($rowNumber > 0) {
334 0         0 $message .= "\nFailed on row $rowNumber in input $params{input}.\n";
335             }
336 0 0       0 if ($mturk->request) {
337 0         0 $message .= "\nLast operation called " . $mturk->request->{Operation} . ".\n";
338 0         0 $message .= "\nDump of call parameters:\n" .
339             formatDataStructure($mturk->request, 4) . "\n";
340             }
341 0 0       0 if ($mturk->response) {
342 0         0 $message .= "\nDump of response:\n" .
343             formatDataStructure($mturk->response->fullResult, 4) . "\n";
344             }
345 0         0 Carp::croak($message);
346             }
347            
348 0 0       0 if ($progress) {
349 0         0 $progress->(" Failed to load $failures hits.");
350 0         0 $progress->(" Loaded $hitsLoaded hits.");
351 0         0 $progress->("--[Done Loading HITs] " . scalar localtime() . " ---");
352 0         0 $progress->(" Total load time: " . (time() - $start) . " seconds.");
353 0         0 $progress->(" You may see your HITs here: " . $mturk->getHITTypeURL($hitTypeId));
354             }
355            
356 0         0 return { loaded => $hitsLoaded, failed => $failures, HITTypeId => $hitTypeId };
357             }
358              
359             sub previewBlock {
360 0     0 0 0 my ($preview) = @_;
361 0 0       0 if (!defined($preview)) {
    0          
    0          
362 0         0 return $preview;
363             }
364             elsif (UNIVERSAL::isa($preview, "CODE")) {
365 0         0 return $preview;
366             }
367             elsif (UNIVERSAL::isa($preview, "GLOB")) {
368             return sub {
369 0     0   0 my $hitProps = shift;
370 0         0 print $preview $hitProps->{Question}, "\n";
371 0         0 };
372             }
373             else {
374 0         0 my $out;
375             return sub {
376 0     0   0 my $hitProps = shift;
377 0 0       0 if (!$out) {
378 0         0 $out = IO::File->new($preview, "w");
379 0 0       0 if (!$out) {
380 0         0 die "Couldn't open file $preview - $!.";
381             }
382             }
383 0         0 print $out $hitProps->{Question}, "\n";
384 0         0 };
385             }
386             }
387              
388             sub xmlEntityEscapeHashValues {
389 0     0 0 0 my $hash = shift;
390 0         0 my $newHash = {};
391 0         0 while (my ($key,$value) = each %$hash) {
392 0 0       0 if (!exists $newHash->{"${key}_raw"}) {
393 0         0 $newHash->{"${key}_raw"} = $hash->{$key};
394             }
395 0         0 $newHash->{$key} = xmlEntityEscape($hash->{$key});
396             }
397 0         0 return $newHash;
398             }
399              
400             sub xmlEntityEscape {
401 1     1 0 12 my $text = shift;
402 1 50       4 return $text unless defined($text);
403 1         9 $text =~ s/[&'"<>]/xmlCharacterEscape($&)/egs;
  1         4  
404 1         5 return $text;
405             }
406              
407             sub xmlCharacterEscape {
408 1 50   1 0 6 if ($_[0] eq "<") { return "<"; }
  1 0       7  
    0          
    0          
    0          
409 0           elsif ($_[0] eq ">") { return ">"; }
410 0           elsif ($_[0] eq "&") { return "&"; }
411 0           elsif ($_[0] eq "\"") { return """; }
412 0           elsif ($_[0] eq "'") { return "'"; }
413 0           return $_[0];
414             }
415              
416             return 1;