@@ -22,8 +22,6 @@ import org.apache.spark.sql.jdbc.JdbcDialects
22
22
import org .apache .spark .sql .execution .datasources .jdbc .JdbcUtils .{createConnectionFactory , getSchema , schemaString }
23
23
import com .microsoft .sqlserver .jdbc .{SQLServerBulkCopy , SQLServerBulkCopyOptions }
24
24
25
- import scala .collection .mutable .ListBuffer
26
-
27
25
/**
28
26
* BulkCopyUtils Object implements common utility function used by both datapool and
29
27
*/
@@ -35,7 +33,7 @@ object BulkCopyUtils extends Logging {
35
33
* a connection, sets connection properties and does a BulkWrite. Called when writing data to
36
34
* master instance and data pools both. URL in options is used to create the relevant connection.
37
35
*
38
- * @param itertor - iterator for row of the partition.
36
+ * @param iterator - iterator for row of the partition.
39
37
* @param dfColMetadata - array of ColumnMetadata type
40
38
* @param options - SQLServerBulkJdbcOptions with url for the connection
41
39
*/
@@ -179,32 +177,6 @@ object BulkCopyUtils extends Logging {
179
177
conn.createStatement.executeQuery(queryStr)
180
178
}
181
179
182
- /**
183
- * getAutoCols
184
- * utility function to get auto generated columns.
185
- * Use auto generated column names to exclude them when matching schema.
186
- */
187
- private [spark] def getAutoCols (
188
- conn : Connection ,
189
- table : String ): List [String ] = {
190
- // auto cols union computed cols, generated always cols, and node / edge table auto cols
191
- val queryStr = s """ SELECT name
192
- FROM sys.columns
193
- WHERE object_id = OBJECT_ID(' ${table}')
194
- AND (is_computed = 1 -- computed column
195
- OR generated_always_type > 0 -- generated always / temporal table
196
- OR (is_hidden = 0 AND graph_type = 2)) -- graph table
197
- """
198
-
199
- val autoColRs = conn.createStatement.executeQuery(queryStr)
200
- val autoCols = ListBuffer [String ]()
201
- while (autoColRs.next()) {
202
- val colName = autoColRs.getString(" name" )
203
- autoCols.append(colName)
204
- }
205
- autoCols.toList
206
- }
207
-
208
180
/**
209
181
* getColMetadataMap
210
182
* Utility function convert result set meta data to array.
@@ -290,35 +262,30 @@ object BulkCopyUtils extends Logging {
290
262
val dfCols = df.schema
291
263
292
264
val tableCols = getSchema(rs, JdbcDialects .get(url))
293
- val autoCols = getAutoCols(conn, dbtable)
294
-
295
- val columnsToWriteSet = columnsToWrite.split(" ," ).toSet
296
- logDebug(s " columnsToWrite: $columnsToWriteSet" )
297
265
298
266
val prefix = " Spark Dataframe and SQL Server table have differing"
299
267
300
- // auto columns should not exist in df
301
- assertIfCheckEnabled(dfCols.length + autoCols.length == tableCols.length, strictSchemaCheck,
302
- s " ${prefix} numbers of columns " )
303
-
304
268
// if columnsToWrite provided by user, use it for metadata mapping. If not, use sql table.
305
- if (columnsToWrite == " " ) {
306
- val result = new Array [ColumnMetadata ](columnsToWriteSet.size)
269
+ var metadataLen = tableCols.length
270
+ var columnsToWriteSet : Set [String ] = Set ()
271
+ if (columnsToWrite.isEmpty) {
272
+ assertIfCheckEnabled(dfCols.length == tableCols.length, strictSchemaCheck,
273
+ s " ${prefix} numbers of columns " )
307
274
} else {
308
- val result = new Array [ColumnMetadata ](tableCols.length - autoCols.length)
275
+ columnsToWriteSet = columnsToWrite.split(" ," ).map(_.trim).toSet
276
+ logDebug(s " columnsToWrite: $columnsToWriteSet" )
277
+ metadataLen = columnsToWriteSet.size
309
278
}
310
279
311
- var nonAutoColIndex = 0
280
+ var colMappingIndex = 0
281
+ val result = new Array [ColumnMetadata ](metadataLen)
312
282
313
283
for (i <- 0 to tableCols.length- 1 ) {
314
284
val tableColName = tableCols(i).name
315
285
var dfFieldIndex = - 1
316
- if ( ! columnsToWriteSet.isEmpty && ! columnsToWriteSet.contains(tableColName)) {
317
- // if columnsToWrite provided, and column name not in it, skip column mapping and ColumnMetadata
286
+ // if columnsToWrite provided, and column name not in it, skip column mapping and ColumnMetadata
287
+ if ( ! columnsToWrite.isEmpty && ! columnsToWriteSet.contains(tableColName)) {
318
288
logDebug(s " skipping col index $i col name $tableColName, user not provided in columnsToWrite list " )
319
- } else if (autoCols.contains(tableColName)) {
320
- // if auto columns, skip column mapping and ColumnMetadata
321
- logDebug(s " skipping auto generated col index $i col name $tableColName dfFieldIndex $dfFieldIndex" )
322
289
}else {
323
290
var dfColName : String = " "
324
291
if (isCaseSensitive) {
@@ -361,15 +328,15 @@ object BulkCopyUtils extends Logging {
361
328
s " DF col ${dfColName} nullable config is ${dfCols(dfFieldIndex).nullable} " +
362
329
s " Table col ${tableColName} nullable config is ${tableCols(i).nullable}" )
363
330
364
- // Schema check passed for element, Create ColMetaData only for non auto generated column
365
- result(nonAutoColIndex ) = new ColumnMetadata (
331
+ // Schema check passed for element, Create ColMetaData for columns
332
+ result(colMappingIndex ) = new ColumnMetadata (
366
333
rs.getMetaData().getColumnName(i+ 1 ),
367
334
rs.getMetaData().getColumnType(i+ 1 ),
368
335
rs.getMetaData().getPrecision(i+ 1 ),
369
336
rs.getMetaData().getScale(i+ 1 ),
370
337
dfFieldIndex
371
338
)
372
- nonAutoColIndex += 1
339
+ colMappingIndex += 1
373
340
}
374
341
}
375
342
result
0 commit comments