@@ -111,6 +111,7 @@ def _fetch_redshift_column_attributes(self, column):
111
111
text += " SORTKEY"
112
112
return text
113
113
114
+
114
115
class RedshiftDialect (PGDialect_psycopg2 ):
115
116
name = 'redshift'
116
117
ddl_compiler = RedShiftDDLCompiler
@@ -182,37 +183,119 @@ class UnloadFromSelect(Executable, ClauseElement):
182
183
''' Prepares a RedShift unload statement to drop a query to Amazon S3
183
184
http://docs.aws.amazon.com/redshift/latest/dg/r_UNLOAD_command_examples.html
184
185
'''
185
- def __init__ (self , select , bucket , access_key , secret_key , parallel = 'on' ):
186
+ def __init__ (self , select , unload_location , access_key , secret_key , session_token = '' , options = {} ):
186
187
''' Initializes an UnloadFromSelect instance
187
188
188
189
Args:
189
190
self: An instance of UnloadFromSelect
190
191
select: The select statement to be unloaded
191
- bucket: The Amazon S3 bucket where the result will be stored
192
- access_key: The Amazon Access Key ID
193
- secret_key: The Amazon Secret Access Key
194
- parallel: If 'ON' the result will be written to multiple files. If
195
- 'OFF' the result will write to one (1) file up to 6.2GB before
196
- splitting
192
+ unload_location: The Amazon S3 bucket where the result will be stored
193
+ access_key - AWS Access Key (required)
194
+ secret_key - AWS Secret Key (required)
195
+ session_token - AWS STS Session Token (optional)
196
+ options - Set of optional parameters to modify the UNLOAD sql
197
+ parallel: If 'ON' the result will be written to multiple files. If
198
+ 'OFF' the result will write to one (1) file up to 6.2GB before
199
+ splitting
200
+ add_quotes: Boolean value for ADDQUOTES; defaults to True
201
+ null_as: optional string that represents a null value in unload output
202
+ delimiter - File delimiter. Defaults to ','
197
203
'''
198
204
self .select = select
199
- self .bucket = bucket
205
+ self .unload_location = unload_location
200
206
self .access_key = access_key
201
207
self .secret_key = secret_key
202
- self .parallel = parallel
208
+ self .session_token = session_token
209
+ self .options = options
203
210
204
211
205
212
@compiles (UnloadFromSelect )
206
213
def visit_unload_from_select (element , compiler , ** kw ):
207
214
''' Returns the actual sql query for the UnloadFromSelect class
215
+
216
+ '''
217
+ return """
218
+ UNLOAD ('%(query)s') TO '%(unload_location)s'
219
+ CREDENTIALS 'aws_access_key_id=%(access_key)s;aws_secret_access_key=%(secret_key)s%(session_token)s'
220
+ DELIMITER '%(delimiter)s'
221
+ %(add_quotes)s
222
+ %(null_as)s
223
+ ALLOWOVERWRITE
224
+ PARALLEL %(parallel)s;
225
+ """ % \
226
+ {'query' : compiler .process (element .select , unload_select = True , literal_binds = True ),
227
+ 'unload_location' : element .unload_location ,
228
+ 'access_key' : element .access_key ,
229
+ 'secret_key' : element .secret_key ,
230
+ 'session_token' : ';token=%s' % element .session_token if element .session_token else '' ,
231
+ 'add_quotes' : 'ADDQUOTES' if bool (element .options .get ('add_quotes' , True )) else '' ,
232
+ 'null_as' : ("NULL '%s'" % element .options .get ('null_as' )) if element .options .get ('null_as' ) else '' ,
233
+ 'delimiter' : element .options .get ('delimiter' , ',' ),
234
+ 'parallel' : element .options .get ('parallel' , 'ON' )}
235
+
236
+
237
+ class CopyCommand (Executable , ClauseElement ):
238
+ ''' Prepares a RedShift COPY statement
239
+ '''
240
+ def __init__ (self , schema_name , table_name , data_location , access_key , secret_key , session_token = '' , options = {}):
241
+ ''' Initializes a CopyCommand instance
242
+
243
+ Args:
244
+ self: An instance of CopyCommand
245
+ schema_name - Schema associated with the table_name
246
+ table_name: The table to copy the data into
247
+ data_location The Amazon S3 location from where to copy - or a manifest file if 'manifest' option is used
248
+ access_key - AWS Access Key (required)
249
+ secret_key - AWS Secret Key (required)
250
+ session_token - AWS STS Session Token (optional)
251
+ options - Set of optional parameters to modify the COPY sql
252
+ delimiter - File delimiter; defaults to ','
253
+ ignore_header - Integer value of number of lines to skip at the start of each file
254
+ null - Optional string value denoting what to interpret as a NULL value from the file
255
+ manifest - Boolean value denoting whether data_location is a manifest file; defaults to False
256
+ empty_as_null - Boolean value denoting whether to load VARCHAR fields with
257
+ empty values as NULL instead of empty string; defaults to True
258
+ blanks_as_null - Boolean value denoting whether to load VARCHAR fields with
259
+ whitespace only values as NULL instead of whitespace; defaults to True
260
+ '''
261
+ self .schema_name = schema_name
262
+ self .table_name = table_name
263
+ self .data_location = data_location
264
+ self .access_key = access_key
265
+ self .secret_key = secret_key
266
+ self .session_token = session_token
267
+ self .options = options
268
+
269
+
270
+ @compiles (CopyCommand )
271
+ def visit_copy_command (element , compiler , ** kw ):
272
+ ''' Returns the actual sql query for the CopyCommand class
208
273
'''
209
- return "unload ('%(query)s') to '%(bucket)s' credentials 'aws_access_key_id=%(access_key)s;aws_secret_access_key=%(secret_key)s' delimiter ',' addquotes allowoverwrite parallel %(parallel)s" % {
210
- 'query' : compiler .process (element .select , unload_select = True , literal_binds = True ),
211
- 'bucket' : element .bucket ,
212
- 'access_key' : element .access_key ,
213
- 'secret_key' : element .secret_key ,
214
- 'parallel' : element .parallel ,
215
- }
274
+ return """
275
+ COPY %(schema_name)s.%(table_name)s FROM '%(data_location)s'
276
+ CREDENTIALS 'aws_access_key_id=%(access_key)s;aws_secret_access_key=%(secret_key)s%(session_token)s'
277
+ CSV
278
+ TRUNCATECOLUMNS
279
+ DELIMITER '%(delimiter)s'
280
+ IGNOREHEADER %(ignore_header)s
281
+ %(null)s
282
+ %(manifest)s
283
+ %(empty_as_null)s
284
+ %(blanks_as_null)s;
285
+ """ % \
286
+ {'schema_name' : element .schema_name ,
287
+ 'table_name' : element .table_name ,
288
+ 'data_location' : element .data_location ,
289
+ 'access_key' : element .access_key ,
290
+ 'secret_key' : element .secret_key ,
291
+ 'session_token' : ';token=%s' % element .session_token if element .session_token else '' ,
292
+ 'null' : ("NULL '%s'" % element .options .get ('null' )) if element .options .get ('null' ) else '' ,
293
+ 'delimiter' : element .options .get ('delimiter' , ',' ),
294
+ 'ignore_header' : element .options .get ('ignore_header' , 0 ),
295
+ 'manifest' : 'MANIFEST' if bool (element .options .get ('manifest' , False )) else '' ,
296
+ 'empty_as_null' : 'EMPTYASNULL' if bool (element .options .get ('empty_as_null' , True )) else '' ,
297
+ 'blanks_as_null' : 'BLANKSASNULL' if bool (element .options .get ('blanks_as_null' , True )) else '' }
298
+
216
299
217
300
@compiles (BindParameter )
218
301
def visit_bindparam (bindparam , compiler , ** kw ):
0 commit comments