Skip to content
This repository was archived by the owner on Feb 27, 2025. It is now read-only.

Commit 2a1506e

Browse files
committed
leave out auto col and use user providec col mapping
1 parent 6d9e49f commit 2a1506e

File tree

1 file changed

+16
-49
lines changed

1 file changed

+16
-49
lines changed

src/main/scala/com/microsoft/sqlserver/jdbc/spark/utils/BulkCopyUtils.scala

Lines changed: 16 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ import org.apache.spark.sql.jdbc.JdbcDialects
2222
import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils.{createConnectionFactory, getSchema, schemaString}
2323
import com.microsoft.sqlserver.jdbc.{SQLServerBulkCopy, SQLServerBulkCopyOptions}
2424

25-
import scala.collection.mutable.ListBuffer
26-
2725
/**
2826
* BulkCopyUtils Object implements common utility function used by both datapool and
2927
*/
@@ -35,7 +33,7 @@ object BulkCopyUtils extends Logging {
3533
* a connection, sets connection properties and does a BulkWrite. Called when writing data to
3634
* master instance and data pools both. URL in options is used to create the relevant connection.
3735
*
38-
* @param itertor - iterator for row of the partition.
36+
* @param iterator - iterator for row of the partition.
3937
* @param dfColMetadata - array of ColumnMetadata type
4038
* @param options - SQLServerBulkJdbcOptions with url for the connection
4139
*/
@@ -179,32 +177,6 @@ object BulkCopyUtils extends Logging {
179177
conn.createStatement.executeQuery(queryStr)
180178
}
181179

182-
/**
183-
* getAutoCols
184-
* utility function to get auto generated columns.
185-
* Use auto generated column names to exclude them when matching schema.
186-
*/
187-
private[spark] def getAutoCols(
188-
conn: Connection,
189-
table: String): List[String] = {
190-
// auto cols union computed cols, generated always cols, and node / edge table auto cols
191-
val queryStr = s"""SELECT name
192-
FROM sys.columns
193-
WHERE object_id = OBJECT_ID('${table}')
194-
AND (is_computed = 1 -- computed column
195-
OR generated_always_type > 0 -- generated always / temporal table
196-
OR (is_hidden = 0 AND graph_type = 2)) -- graph table
197-
"""
198-
199-
val autoColRs = conn.createStatement.executeQuery(queryStr)
200-
val autoCols = ListBuffer[String]()
201-
while (autoColRs.next()) {
202-
val colName = autoColRs.getString("name")
203-
autoCols.append(colName)
204-
}
205-
autoCols.toList
206-
}
207-
208180
/**
209181
* getColMetadataMap
210182
* Utility function convert result set meta data to array.
@@ -290,35 +262,30 @@ object BulkCopyUtils extends Logging {
290262
val dfCols = df.schema
291263

292264
val tableCols = getSchema(rs, JdbcDialects.get(url))
293-
val autoCols = getAutoCols(conn, dbtable)
294-
295-
val columnsToWriteSet = columnsToWrite.split(",").toSet
296-
logDebug(s"columnsToWrite: $columnsToWriteSet")
297265

298266
val prefix = "Spark Dataframe and SQL Server table have differing"
299267

300-
// auto columns should not exist in df
301-
assertIfCheckEnabled(dfCols.length + autoCols.length == tableCols.length, strictSchemaCheck,
302-
s"${prefix} numbers of columns")
303-
304268
// if columnsToWrite provided by user, use it for metadata mapping. If not, use sql table.
305-
if (columnsToWrite == "") {
306-
val result = new Array[ColumnMetadata](columnsToWriteSet.size)
269+
var metadataLen = tableCols.length
270+
var columnsToWriteSet: Set[String] = Set()
271+
if (columnsToWrite.isEmpty) {
272+
assertIfCheckEnabled(dfCols.length == tableCols.length, strictSchemaCheck,
273+
s"${prefix} numbers of columns")
307274
} else {
308-
val result = new Array[ColumnMetadata](tableCols.length - autoCols.length)
275+
columnsToWriteSet = columnsToWrite.split(",").map(_.trim).toSet
276+
logDebug(s"columnsToWrite: $columnsToWriteSet")
277+
metadataLen = columnsToWriteSet.size
309278
}
310279

311-
var nonAutoColIndex = 0
280+
var colMappingIndex = 0
281+
val result = new Array[ColumnMetadata](metadataLen)
312282

313283
for (i <- 0 to tableCols.length-1) {
314284
val tableColName = tableCols(i).name
315285
var dfFieldIndex = -1
316-
if (!columnsToWriteSet.isEmpty && !columnsToWriteSet.contains(tableColName)) {
317-
// if columnsToWrite provided, and column name not in it, skip column mapping and ColumnMetadata
286+
// if columnsToWrite provided, and column name not in it, skip column mapping and ColumnMetadata
287+
if (!columnsToWrite.isEmpty && !columnsToWriteSet.contains(tableColName)) {
318288
logDebug(s"skipping col index $i col name $tableColName, user not provided in columnsToWrite list")
319-
} else if (autoCols.contains(tableColName)) {
320-
// if auto columns, skip column mapping and ColumnMetadata
321-
logDebug(s"skipping auto generated col index $i col name $tableColName dfFieldIndex $dfFieldIndex")
322289
}else{
323290
var dfColName:String = ""
324291
if (isCaseSensitive) {
@@ -361,15 +328,15 @@ object BulkCopyUtils extends Logging {
361328
s" DF col ${dfColName} nullable config is ${dfCols(dfFieldIndex).nullable} " +
362329
s" Table col ${tableColName} nullable config is ${tableCols(i).nullable}")
363330

364-
// Schema check passed for element, Create ColMetaData only for non auto generated column
365-
result(nonAutoColIndex) = new ColumnMetadata(
331+
// Schema check passed for element, Create ColMetaData for columns
332+
result(colMappingIndex) = new ColumnMetadata(
366333
rs.getMetaData().getColumnName(i+1),
367334
rs.getMetaData().getColumnType(i+1),
368335
rs.getMetaData().getPrecision(i+1),
369336
rs.getMetaData().getScale(i+1),
370337
dfFieldIndex
371338
)
372-
nonAutoColIndex += 1
339+
colMappingIndex += 1
373340
}
374341
}
375342
result

0 commit comments

Comments
 (0)