line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Paws::MachineLearning::RedshiftDataSpec; |
2
|
1
|
|
|
1
|
|
508
|
use Moose; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
10
|
|
3
|
|
|
|
|
|
|
has DatabaseCredentials => (is => 'ro', isa => 'Paws::MachineLearning::RedshiftDatabaseCredentials', required => 1); |
4
|
|
|
|
|
|
|
has DatabaseInformation => (is => 'ro', isa => 'Paws::MachineLearning::RedshiftDatabase', required => 1); |
5
|
|
|
|
|
|
|
has DataRearrangement => (is => 'ro', isa => 'Str'); |
6
|
|
|
|
|
|
|
has DataSchema => (is => 'ro', isa => 'Str'); |
7
|
|
|
|
|
|
|
has DataSchemaUri => (is => 'ro', isa => 'Str'); |
8
|
|
|
|
|
|
|
has S3StagingLocation => (is => 'ro', isa => 'Str', required => 1); |
9
|
|
|
|
|
|
|
has SelectSqlQuery => (is => 'ro', isa => 'Str', required => 1); |
10
|
|
|
|
|
|
|
1; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
### main pod documentation begin ### |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Paws::MachineLearning::RedshiftDataSpec |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 USAGE |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
This class represents one of two things: |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head3 Arguments in a call to a service |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
Use the attributes of this class as arguments to methods. You shouldn't make instances of this class. |
25
|
|
|
|
|
|
|
Each attribute should be used as a named argument in the calls that expect this type of object. |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
As an example, if Att1 is expected to be a Paws::MachineLearning::RedshiftDataSpec object: |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$service_obj->Method(Att1 => { DatabaseCredentials => $value, ..., SelectSqlQuery => $value }); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head3 Results returned from an API call |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
Use accessors for each attribute. If Att1 is expected to be an Paws::MachineLearning::RedshiftDataSpec object: |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
$result = $service_obj->Method(...); |
36
|
|
|
|
|
|
|
$result->Att1->DatabaseCredentials |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 DESCRIPTION |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Describes the data specification of an Amazon Redshift C<DataSource>. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head2 B<REQUIRED> DatabaseCredentials => L<Paws::MachineLearning::RedshiftDatabaseCredentials> |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
Describes AWS Identity and Access Management (IAM) credentials that are |
48
|
|
|
|
|
|
|
used connect to the Amazon Redshift database. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 B<REQUIRED> DatabaseInformation => L<Paws::MachineLearning::RedshiftDatabase> |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Describes the C<DatabaseName> and C<ClusterIdentifier> for an Amazon |
54
|
|
|
|
|
|
|
Redshift C<DataSource>. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head2 DataRearrangement => Str |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
A JSON string that represents the splitting and rearrangement |
60
|
|
|
|
|
|
|
processing to be applied to a C<DataSource>. If the |
61
|
|
|
|
|
|
|
C<DataRearrangement> parameter is not provided, all of the input data |
62
|
|
|
|
|
|
|
is used to create the C<Datasource>. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
There are multiple parameters that control what data is used to create |
65
|
|
|
|
|
|
|
a datasource: |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=over |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=item * |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
B<C<percentBegin>> |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Use C<percentBegin> to indicate the beginning of the range of the data |
74
|
|
|
|
|
|
|
used to create the Datasource. If you do not include C<percentBegin> |
75
|
|
|
|
|
|
|
and C<percentEnd>, Amazon ML includes all of the data when creating the |
76
|
|
|
|
|
|
|
datasource. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=item * |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
B<C<percentEnd>> |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Use C<percentEnd> to indicate the end of the range of the data used to |
83
|
|
|
|
|
|
|
create the Datasource. If you do not include C<percentBegin> and |
84
|
|
|
|
|
|
|
C<percentEnd>, Amazon ML includes all of the data when creating the |
85
|
|
|
|
|
|
|
datasource. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=item * |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
B<C<complement>> |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
The C<complement> parameter instructs Amazon ML to use the data that is |
92
|
|
|
|
|
|
|
not included in the range of C<percentBegin> to C<percentEnd> to create |
93
|
|
|
|
|
|
|
a datasource. The C<complement> parameter is useful if you need to |
94
|
|
|
|
|
|
|
create complementary datasources for training and evaluation. To create |
95
|
|
|
|
|
|
|
a complementary datasource, use the same values for C<percentBegin> and |
96
|
|
|
|
|
|
|
C<percentEnd>, along with the C<complement> parameter. |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
For example, the following two datasources do not share any data, and |
99
|
|
|
|
|
|
|
can be used to train and evaluate a model. The first datasource has 25 |
100
|
|
|
|
|
|
|
percent of the data, and the second one has 75 percent of the data. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Datasource for evaluation: C<{"splitting":{"percentBegin":0, |
103
|
|
|
|
|
|
|
"percentEnd":25}}> |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Datasource for training: C<{"splitting":{"percentBegin":0, |
106
|
|
|
|
|
|
|
"percentEnd":25, "complement":"true"}}> |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item * |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
B<C<strategy>> |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
To change how Amazon ML splits the data for a datasource, use the |
113
|
|
|
|
|
|
|
C<strategy> parameter. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
The default value for the C<strategy> parameter is C<sequential>, |
116
|
|
|
|
|
|
|
meaning that Amazon ML takes all of the data records between the |
117
|
|
|
|
|
|
|
C<percentBegin> and C<percentEnd> parameters for the datasource, in the |
118
|
|
|
|
|
|
|
order that the records appear in the input data. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
The following two C<DataRearrangement> lines are examples of |
121
|
|
|
|
|
|
|
sequentially ordered training and evaluation datasources: |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Datasource for evaluation: C<{"splitting":{"percentBegin":70, |
124
|
|
|
|
|
|
|
"percentEnd":100, "strategy":"sequential"}}> |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Datasource for training: C<{"splitting":{"percentBegin":70, |
127
|
|
|
|
|
|
|
"percentEnd":100, "strategy":"sequential", "complement":"true"}}> |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
To randomly split the input data into the proportions indicated by the |
130
|
|
|
|
|
|
|
percentBegin and percentEnd parameters, set the C<strategy> parameter |
131
|
|
|
|
|
|
|
to C<random> and provide a string that is used as the seed value for |
132
|
|
|
|
|
|
|
the random data splitting (for example, you can use the S3 path to your |
133
|
|
|
|
|
|
|
data as the random seed string). If you choose the random split |
134
|
|
|
|
|
|
|
strategy, Amazon ML assigns each row of data a pseudo-random number |
135
|
|
|
|
|
|
|
between 0 and 100, and then selects the rows that have an assigned |
136
|
|
|
|
|
|
|
number between C<percentBegin> and C<percentEnd>. Pseudo-random numbers |
137
|
|
|
|
|
|
|
are assigned using both the input seed string value and the byte offset |
138
|
|
|
|
|
|
|
as a seed, so changing the data results in a different split. Any |
139
|
|
|
|
|
|
|
existing ordering is preserved. The random splitting strategy ensures |
140
|
|
|
|
|
|
|
that variables in the training and evaluation data are distributed |
141
|
|
|
|
|
|
|
similarly. It is useful in the cases where the input data may have an |
142
|
|
|
|
|
|
|
implicit sort order, which would otherwise result in training and |
143
|
|
|
|
|
|
|
evaluation datasources containing non-similar data records. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
The following two C<DataRearrangement> lines are examples of |
146
|
|
|
|
|
|
|
non-sequentially ordered training and evaluation datasources: |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Datasource for evaluation: C<{"splitting":{"percentBegin":70, |
149
|
|
|
|
|
|
|
"percentEnd":100, "strategy":"random", |
150
|
|
|
|
|
|
|
"randomSeed"="s3://my_s3_path/bucket/file.csv"}}> |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
Datasource for training: C<{"splitting":{"percentBegin":70, |
153
|
|
|
|
|
|
|
"percentEnd":100, "strategy":"random", |
154
|
|
|
|
|
|
|
"randomSeed"="s3://my_s3_path/bucket/file.csv", "complement":"true"}}> |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=back |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head2 DataSchema => Str |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
A JSON string that represents the schema for an Amazon Redshift |
163
|
|
|
|
|
|
|
C<DataSource>. The C<DataSchema> defines the structure of the |
164
|
|
|
|
|
|
|
observation data in the data file(s) referenced in the C<DataSource>. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
A C<DataSchema> is not required if you specify a C<DataSchemaUri>. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Define your C<DataSchema> as a series of key-value pairs. C<attributes> |
169
|
|
|
|
|
|
|
and C<excludedVariableNames> have an array of key-value pairs for their |
170
|
|
|
|
|
|
|
value. Use the following format to define your C<DataSchema>. |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
{ "version": "1.0", |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
"recordAnnotationFieldName": "F1", |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
"recordWeightFieldName": "F2", |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
"targetFieldName": "F3", |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
"dataFormat": "CSV", |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
"dataFileContainsHeader": true, |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
"attributes": [ |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
{ "fieldName": "F1", "fieldType": "TEXT" }, { "fieldName": "F2", |
187
|
|
|
|
|
|
|
"fieldType": "NUMERIC" }, { "fieldName": "F3", "fieldType": |
188
|
|
|
|
|
|
|
"CATEGORICAL" }, { "fieldName": "F4", "fieldType": "NUMERIC" }, { |
189
|
|
|
|
|
|
|
"fieldName": "F5", "fieldType": "CATEGORICAL" }, { "fieldName": "F6", |
190
|
|
|
|
|
|
|
"fieldType": "TEXT" }, { "fieldName": "F7", "fieldType": |
191
|
|
|
|
|
|
|
"WEIGHTED_INT_SEQUENCE" }, { "fieldName": "F8", "fieldType": |
192
|
|
|
|
|
|
|
"WEIGHTED_STRING_SEQUENCE" } ], |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
"excludedVariableNames": [ "F6" ] } |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=head2 DataSchemaUri => Str |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Describes the schema location for an Amazon Redshift C<DataSource>. |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head2 B<REQUIRED> S3StagingLocation => Str |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Describes an Amazon S3 location to store the result set of the |
205
|
|
|
|
|
|
|
C<SelectSqlQuery> query. |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=head2 B<REQUIRED> SelectSqlQuery => Str |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Describes the SQL Query to execute on an Amazon Redshift database for |
211
|
|
|
|
|
|
|
an Amazon Redshift C<DataSource>. |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head1 SEE ALSO |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
This class forms part of L<Paws>, describing an object used in L<Paws::MachineLearning> |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=head1 BUGS and CONTRIBUTIONS |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
The source code is located here: https://github.com/pplu/aws-sdk-perl |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Please report bugs to: https://github.com/pplu/aws-sdk-perl/issues |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=cut |
226
|
|
|
|
|
|
|
|