File Coverage

blib/lib/Paws/MachineLearning/S3DataSpec.pm
Criterion Covered Total %
statement 3 3 100.0
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 4 4 100.0


line stmt bran cond sub pod time code
1             package Paws::MachineLearning::S3DataSpec;
2 1     1   457 use Moose;
  1         6  
  1         11  
3             has DataLocationS3 => (is => 'ro', isa => 'Str', required => 1);
4             has DataRearrangement => (is => 'ro', isa => 'Str');
5             has DataSchema => (is => 'ro', isa => 'Str');
6             has DataSchemaLocationS3 => (is => 'ro', isa => 'Str');
7             1;
8              
9             ### main pod documentation begin ###
10              
11             =head1 NAME
12              
13             Paws::MachineLearning::S3DataSpec
14              
15             =head1 USAGE
16              
17             This class represents one of two things:
18              
19             =head3 Arguments in a call to a service
20              
21             Use the attributes of this class as arguments to methods. You shouldn't make instances of this class.
22             Each attribute should be used as a named argument in the calls that expect this type of object.
23              
24             As an example, if Att1 is expected to be a Paws::MachineLearning::S3DataSpec object:
25              
26             $service_obj->Method(Att1 => { DataLocationS3 => $value, ..., DataSchemaLocationS3 => $value });
27              
28             =head3 Results returned from an API call
29              
30             Use accessors for each attribute. If Att1 is expected to be an Paws::MachineLearning::S3DataSpec object:
31              
32             $result = $service_obj->Method(...);
33             $result->Att1->DataLocationS3
34              
35             =head1 DESCRIPTION
36              
37             Describes the data specification of a C<DataSource>.
38              
39             =head1 ATTRIBUTES
40              
41              
42             =head2 B<REQUIRED> DataLocationS3 => Str
43              
44             The location of the data file(s) used by a C<DataSource>. The URI
45             specifies a data file or an Amazon Simple Storage Service (Amazon S3)
46             directory or bucket containing data files.
47              
48              
49             =head2 DataRearrangement => Str
50              
51             A JSON string that represents the splitting and rearrangement
52             processing to be applied to a C<DataSource>. If the
53             C<DataRearrangement> parameter is not provided, all of the input data
54             is used to create the C<Datasource>.
55              
56             There are multiple parameters that control what data is used to create
57             a datasource:
58              
59             =over
60              
61             =item *
62              
63             B<C<percentBegin>>
64              
65             Use C<percentBegin> to indicate the beginning of the range of the data
66             used to create the Datasource. If you do not include C<percentBegin>
67             and C<percentEnd>, Amazon ML includes all of the data when creating the
68             datasource.
69              
70             =item *
71              
72             B<C<percentEnd>>
73              
74             Use C<percentEnd> to indicate the end of the range of the data used to
75             create the Datasource. If you do not include C<percentBegin> and
76             C<percentEnd>, Amazon ML includes all of the data when creating the
77             datasource.
78              
79             =item *
80              
81             B<C<complement>>
82              
83             The C<complement> parameter instructs Amazon ML to use the data that is
84             not included in the range of C<percentBegin> to C<percentEnd> to create
85             a datasource. The C<complement> parameter is useful if you need to
86             create complementary datasources for training and evaluation. To create
87             a complementary datasource, use the same values for C<percentBegin> and
88             C<percentEnd>, along with the C<complement> parameter.
89              
90             For example, the following two datasources do not share any data, and
91             can be used to train and evaluate a model. The first datasource has 25
92             percent of the data, and the second one has 75 percent of the data.
93              
94             Datasource for evaluation: C<{"splitting":{"percentBegin":0,
95             "percentEnd":25}}>
96              
97             Datasource for training: C<{"splitting":{"percentBegin":0,
98             "percentEnd":25, "complement":"true"}}>
99              
100             =item *
101              
102             B<C<strategy>>
103              
104             To change how Amazon ML splits the data for a datasource, use the
105             C<strategy> parameter.
106              
107             The default value for the C<strategy> parameter is C<sequential>,
108             meaning that Amazon ML takes all of the data records between the
109             C<percentBegin> and C<percentEnd> parameters for the datasource, in the
110             order that the records appear in the input data.
111              
112             The following two C<DataRearrangement> lines are examples of
113             sequentially ordered training and evaluation datasources:
114              
115             Datasource for evaluation: C<{"splitting":{"percentBegin":70,
116             "percentEnd":100, "strategy":"sequential"}}>
117              
118             Datasource for training: C<{"splitting":{"percentBegin":70,
119             "percentEnd":100, "strategy":"sequential", "complement":"true"}}>
120              
121             To randomly split the input data into the proportions indicated by the
122             percentBegin and percentEnd parameters, set the C<strategy> parameter
123             to C<random> and provide a string that is used as the seed value for
124             the random data splitting (for example, you can use the S3 path to your
125             data as the random seed string). If you choose the random split
126             strategy, Amazon ML assigns each row of data a pseudo-random number
127             between 0 and 100, and then selects the rows that have an assigned
128             number between C<percentBegin> and C<percentEnd>. Pseudo-random numbers
129             are assigned using both the input seed string value and the byte offset
130             as a seed, so changing the data results in a different split. Any
131             existing ordering is preserved. The random splitting strategy ensures
132             that variables in the training and evaluation data are distributed
133             similarly. It is useful in the cases where the input data may have an
134             implicit sort order, which would otherwise result in training and
135             evaluation datasources containing non-similar data records.
136              
137             The following two C<DataRearrangement> lines are examples of
138             non-sequentially ordered training and evaluation datasources:
139              
140             Datasource for evaluation: C<{"splitting":{"percentBegin":70,
141             "percentEnd":100, "strategy":"random",
142             "randomSeed"="s3://my_s3_path/bucket/file.csv"}}>
143              
144             Datasource for training: C<{"splitting":{"percentBegin":70,
145             "percentEnd":100, "strategy":"random",
146             "randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}}>
147              
148             =back
149              
150              
151              
152             =head2 DataSchema => Str
153              
154             A JSON string that represents the schema for an Amazon S3
155             C<DataSource>. The C<DataSchema> defines the structure of the
156             observation data in the data file(s) referenced in the C<DataSource>.
157              
158             You must provide either the C<DataSchema> or the
159             C<DataSchemaLocationS3>.
160              
161             Define your C<DataSchema> as a series of key-value pairs. C<attributes>
162             and C<excludedVariableNames> have an array of key-value pairs for their
163             value. Use the following format to define your C<DataSchema>.
164              
165             { "version": "1.0",
166              
167             "recordAnnotationFieldName": "F1",
168              
169             "recordWeightFieldName": "F2",
170              
171             "targetFieldName": "F3",
172              
173             "dataFormat": "CSV",
174              
175             "dataFileContainsHeader": true,
176              
177             "attributes": [
178              
179             { "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2",
180             "fieldType": "NUMERIC" }, { "fieldName": "F3", "fieldType":
181             "CATEGORICAL" }, { "fieldName": "F4", "fieldType": "NUMERIC" }, {
182             "fieldName": "F5", "fieldType": "CATEGORICAL" }, { "fieldName": "F6",
183             "fieldType": "TEXT" }, { "fieldName": "F7", "fieldType":
184             "WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType":
185             "WEIGHTED_STRING_SEQUENCE" } ],
186              
187             "excludedVariableNames": [ "F6" ] }
188              
189              
190             =head2 DataSchemaLocationS3 => Str
191              
192             Describes the schema location in Amazon S3. You must provide either the
193             C<DataSchema> or the C<DataSchemaLocationS3>.
194              
195              
196              
197             =head1 SEE ALSO
198              
199             This class forms part of L<Paws>, describing an object used in L<Paws::MachineLearning>
200              
201             =head1 BUGS and CONTRIBUTIONS
202              
203             The source code is located here: https://github.com/pplu/aws-sdk-perl
204              
205             Please report bugs to: https://github.com/pplu/aws-sdk-perl/issues
206              
207             =cut
208