line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Paws::MachineLearning::RDSDataSpec; |
2
|
1
|
|
|
1
|
|
576
|
use Moose; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
8
|
|
3
|
|
|
|
|
|
|
has DatabaseCredentials => (is => 'ro', isa => 'Paws::MachineLearning::RDSDatabaseCredentials', required => 1); |
4
|
|
|
|
|
|
|
has DatabaseInformation => (is => 'ro', isa => 'Paws::MachineLearning::RDSDatabase', required => 1); |
5
|
|
|
|
|
|
|
has DataRearrangement => (is => 'ro', isa => 'Str'); |
6
|
|
|
|
|
|
|
has DataSchema => (is => 'ro', isa => 'Str'); |
7
|
|
|
|
|
|
|
has DataSchemaUri => (is => 'ro', isa => 'Str'); |
8
|
|
|
|
|
|
|
has ResourceRole => (is => 'ro', isa => 'Str', required => 1); |
9
|
|
|
|
|
|
|
has S3StagingLocation => (is => 'ro', isa => 'Str', required => 1); |
10
|
|
|
|
|
|
|
has SecurityGroupIds => (is => 'ro', isa => 'ArrayRef[Str|Undef]', required => 1); |
11
|
|
|
|
|
|
|
has SelectSqlQuery => (is => 'ro', isa => 'Str', required => 1); |
12
|
|
|
|
|
|
|
has ServiceRole => (is => 'ro', isa => 'Str', required => 1); |
13
|
|
|
|
|
|
|
has SubnetId => (is => 'ro', isa => 'Str', required => 1); |
14
|
|
|
|
|
|
|
1; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
### main pod documentation begin ### |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 NAME |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Paws::MachineLearning::RDSDataSpec |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 USAGE |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
This class represents one of two things: |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head3 Arguments in a call to a service |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Use the attributes of this class as arguments to methods. You shouldn't make instances of this class. |
29
|
|
|
|
|
|
|
Each attribute should be used as a named argument in the calls that expect this type of object. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
As an example, if Att1 is expected to be a Paws::MachineLearning::RDSDataSpec object: |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
$service_obj->Method(Att1 => { DatabaseCredentials => $value, ..., SubnetId => $value }); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head3 Results returned from an API call |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
Use accessors for each attribute. If Att1 is expected to be an Paws::MachineLearning::RDSDataSpec object: |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
$result = $service_obj->Method(...); |
40
|
|
|
|
|
|
|
$result->Att1->DatabaseCredentials |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 DESCRIPTION |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
The data specification of an Amazon Relational Database Service (Amazon |
45
|
|
|
|
|
|
|
RDS) C<DataSource>. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head2 B<REQUIRED> DatabaseCredentials => L<Paws::MachineLearning::RDSDatabaseCredentials> |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
The AWS Identity and Access Management (IAM) credentials that are used |
53
|
|
|
|
|
|
|
connect to the Amazon RDS database. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head2 B<REQUIRED> DatabaseInformation => L<Paws::MachineLearning::RDSDatabase> |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Describes the C<DatabaseName> and C<InstanceIdentifier> of an Amazon |
59
|
|
|
|
|
|
|
RDS database. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head2 DataRearrangement => Str |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
A JSON string that represents the splitting and rearrangement |
65
|
|
|
|
|
|
|
processing to be applied to a C<DataSource>. If the |
66
|
|
|
|
|
|
|
C<DataRearrangement> parameter is not provided, all of the input data |
67
|
|
|
|
|
|
|
is used to create the C<Datasource>. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
There are multiple parameters that control what data is used to create |
70
|
|
|
|
|
|
|
a datasource: |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=over |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=item * |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
B<C<percentBegin>> |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Use C<percentBegin> to indicate the beginning of the range of the data |
79
|
|
|
|
|
|
|
used to create the Datasource. If you do not include C<percentBegin> |
80
|
|
|
|
|
|
|
and C<percentEnd>, Amazon ML includes all of the data when creating the |
81
|
|
|
|
|
|
|
datasource. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item * |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
B<C<percentEnd>> |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Use C<percentEnd> to indicate the end of the range of the data used to |
88
|
|
|
|
|
|
|
create the Datasource. If you do not include C<percentBegin> and |
89
|
|
|
|
|
|
|
C<percentEnd>, Amazon ML includes all of the data when creating the |
90
|
|
|
|
|
|
|
datasource. |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=item * |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
B<C<complement>> |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
The C<complement> parameter instructs Amazon ML to use the data that is |
97
|
|
|
|
|
|
|
not included in the range of C<percentBegin> to C<percentEnd> to create |
98
|
|
|
|
|
|
|
a datasource. The C<complement> parameter is useful if you need to |
99
|
|
|
|
|
|
|
create complementary datasources for training and evaluation. To create |
100
|
|
|
|
|
|
|
a complementary datasource, use the same values for C<percentBegin> and |
101
|
|
|
|
|
|
|
C<percentEnd>, along with the C<complement> parameter. |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
For example, the following two datasources do not share any data, and |
104
|
|
|
|
|
|
|
can be used to train and evaluate a model. The first datasource has 25 |
105
|
|
|
|
|
|
|
percent of the data, and the second one has 75 percent of the data. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Datasource for evaluation: C<{"splitting":{"percentBegin":0, |
108
|
|
|
|
|
|
|
"percentEnd":25}}> |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Datasource for training: C<{"splitting":{"percentBegin":0, |
111
|
|
|
|
|
|
|
"percentEnd":25, "complement":"true"}}> |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=item * |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
B<C<strategy>> |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
To change how Amazon ML splits the data for a datasource, use the |
118
|
|
|
|
|
|
|
C<strategy> parameter. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
The default value for the C<strategy> parameter is C<sequential>, |
121
|
|
|
|
|
|
|
meaning that Amazon ML takes all of the data records between the |
122
|
|
|
|
|
|
|
C<percentBegin> and C<percentEnd> parameters for the datasource, in the |
123
|
|
|
|
|
|
|
order that the records appear in the input data. |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
The following two C<DataRearrangement> lines are examples of |
126
|
|
|
|
|
|
|
sequentially ordered training and evaluation datasources: |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Datasource for evaluation: C<{"splitting":{"percentBegin":70, |
129
|
|
|
|
|
|
|
"percentEnd":100, "strategy":"sequential"}}> |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Datasource for training: C<{"splitting":{"percentBegin":70, |
132
|
|
|
|
|
|
|
"percentEnd":100, "strategy":"sequential", "complement":"true"}}> |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
To randomly split the input data into the proportions indicated by the |
135
|
|
|
|
|
|
|
percentBegin and percentEnd parameters, set the C<strategy> parameter |
136
|
|
|
|
|
|
|
to C<random> and provide a string that is used as the seed value for |
137
|
|
|
|
|
|
|
the random data splitting (for example, you can use the S3 path to your |
138
|
|
|
|
|
|
|
data as the random seed string). If you choose the random split |
139
|
|
|
|
|
|
|
strategy, Amazon ML assigns each row of data a pseudo-random number |
140
|
|
|
|
|
|
|
between 0 and 100, and then selects the rows that have an assigned |
141
|
|
|
|
|
|
|
number between C<percentBegin> and C<percentEnd>. Pseudo-random numbers |
142
|
|
|
|
|
|
|
are assigned using both the input seed string value and the byte offset |
143
|
|
|
|
|
|
|
as a seed, so changing the data results in a different split. Any |
144
|
|
|
|
|
|
|
existing ordering is preserved. The random splitting strategy ensures |
145
|
|
|
|
|
|
|
that variables in the training and evaluation data are distributed |
146
|
|
|
|
|
|
|
similarly. It is useful in the cases where the input data may have an |
147
|
|
|
|
|
|
|
implicit sort order, which would otherwise result in training and |
148
|
|
|
|
|
|
|
evaluation datasources containing non-similar data records. |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
The following two C<DataRearrangement> lines are examples of |
151
|
|
|
|
|
|
|
non-sequentially ordered training and evaluation datasources: |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Datasource for evaluation: C<{"splitting":{"percentBegin":70, |
154
|
|
|
|
|
|
|
"percentEnd":100, "strategy":"random", |
155
|
|
|
|
|
|
|
"randomSeed"="s3://my_s3_path/bucket/file.csv"}}> |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Datasource for training: C<{"splitting":{"percentBegin":70, |
158
|
|
|
|
|
|
|
"percentEnd":100, "strategy":"random", |
159
|
|
|
|
|
|
|
"randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}}> |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=back |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head2 DataSchema => Str |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
A JSON string that represents the schema for an Amazon RDS |
168
|
|
|
|
|
|
|
C<DataSource>. The C<DataSchema> defines the structure of the |
169
|
|
|
|
|
|
|
observation data in the data file(s) referenced in the C<DataSource>. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
A C<DataSchema> is not required if you specify a C<DataSchemaUri> |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Define your C<DataSchema> as a series of key-value pairs. C<attributes> |
174
|
|
|
|
|
|
|
and C<excludedVariableNames> have an array of key-value pairs for their |
175
|
|
|
|
|
|
|
value. Use the following format to define your C<DataSchema>. |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
{ "version": "1.0", |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
"recordAnnotationFieldName": "F1", |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
"recordWeightFieldName": "F2", |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
"targetFieldName": "F3", |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
"dataFormat": "CSV", |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
"dataFileContainsHeader": true, |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
"attributes": [ |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
{ "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2", |
192
|
|
|
|
|
|
|
"fieldType": "NUMERIC" }, { "fieldName": "F3", "fieldType": |
193
|
|
|
|
|
|
|
"CATEGORICAL" }, { "fieldName": "F4", "fieldType": "NUMERIC" }, { |
194
|
|
|
|
|
|
|
"fieldName": "F5", "fieldType": "CATEGORICAL" }, { "fieldName": "F6", |
195
|
|
|
|
|
|
|
"fieldType": "TEXT" }, { "fieldName": "F7", "fieldType": |
196
|
|
|
|
|
|
|
"WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType": |
197
|
|
|
|
|
|
|
"WEIGHTED_STRING_SEQUENCE" } ], |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
"excludedVariableNames": [ "F6" ] } |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head2 DataSchemaUri => Str |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
The Amazon S3 location of the C<DataSchema>. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head2 B<REQUIRED> ResourceRole => Str |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
The role (DataPipelineDefaultResourceRole) assumed by an Amazon Elastic |
210
|
|
|
|
|
|
|
Compute Cloud (Amazon EC2) instance to carry out the copy operation |
211
|
|
|
|
|
|
|
from Amazon RDS to an Amazon S3 task. For more information, see Role |
212
|
|
|
|
|
|
|
templates for data pipelines. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head2 B<REQUIRED> S3StagingLocation => Str |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
The Amazon S3 location for staging Amazon RDS data. The data retrieved |
218
|
|
|
|
|
|
|
from Amazon RDS using C<SelectSqlQuery> is stored in this location. |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=head2 B<REQUIRED> SecurityGroupIds => ArrayRef[Str|Undef] |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
The security group IDs to be used to access a VPC-based RDS DB |
224
|
|
|
|
|
|
|
instance. Ensure that there are appropriate ingress rules set up to |
225
|
|
|
|
|
|
|
allow access to the RDS DB instance. This attribute is used by Data |
226
|
|
|
|
|
|
|
Pipeline to carry out the copy operation from Amazon RDS to an Amazon |
227
|
|
|
|
|
|
|
S3 task. |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=head2 B<REQUIRED> SelectSqlQuery => Str |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
The query that is used to retrieve the observation data for the |
233
|
|
|
|
|
|
|
C<DataSource>. |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=head2 B<REQUIRED> ServiceRole => Str |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
The role (DataPipelineDefaultRole) assumed by AWS Data Pipeline service |
239
|
|
|
|
|
|
|
to monitor the progress of the copy task from Amazon RDS to Amazon S3. |
240
|
|
|
|
|
|
|
For more information, see Role templates for data pipelines. |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=head2 B<REQUIRED> SubnetId => Str |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
The subnet ID to be used to access a VPC-based RDS DB instance. This |
246
|
|
|
|
|
|
|
attribute is used by Data Pipeline to carry out the copy task from |
247
|
|
|
|
|
|
|
Amazon RDS to Amazon S3. |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=head1 SEE ALSO |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
This class forms part of L<Paws>, describing an object used in L<Paws::MachineLearning> |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=head1 BUGS and CONTRIBUTIONS |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
The source code is located here: https://github.com/pplu/aws-sdk-perl |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
Please report bugs to: https://github.com/pplu/aws-sdk-perl/issues |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=cut |
262
|
|
|
|
|
|
|
|