@@ -42,7 +42,18 @@ import org.duckdb.DuckDBColumnType.UUID
4242import org.duckdb.DuckDBColumnType.VARCHAR
4343import org.duckdb.DuckDBResultSetMetaData
4444import org.duckdb.JsonNode
45+ import org.jetbrains.kotlinx.dataframe.AnyRow
46+ import org.jetbrains.kotlinx.dataframe.DataColumn
4547import org.jetbrains.kotlinx.dataframe.DataFrame
48+ import org.jetbrains.kotlinx.dataframe.DataRow
49+ import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
50+ import org.jetbrains.kotlinx.dataframe.api.asDataColumn
51+ import org.jetbrains.kotlinx.dataframe.api.castToNotNullable
52+ import org.jetbrains.kotlinx.dataframe.api.first
53+ import org.jetbrains.kotlinx.dataframe.api.toDataFrame
54+ import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
55+ import org.jetbrains.kotlinx.dataframe.impl.DataCollector
56+ import org.jetbrains.kotlinx.dataframe.impl.schema.DataFrameSchemaImpl
4657import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
4758import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
4859import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
@@ -56,6 +67,7 @@ import java.sql.ResultSet
5667import java.sql.Struct
5768import java.util.Properties
5869import kotlin.collections.toList
70+ import kotlin.reflect.KClass
5971import kotlin.reflect.KTypeProjection
6072import kotlin.reflect.full.createType
6173import kotlin.reflect.full.withNullability
@@ -100,7 +112,7 @@ public object DuckDb : DbType("duckdb") {
100112 */
101113 internal fun parseDuckDbType (sqlTypeName : String , isNullable : Boolean ): AnyTypeInformation =
102114 duckDbTypeCache.getOrPut(Pair (sqlTypeName, isNullable)) {
103- when (DuckDBResultSetMetaData .TypeNameToType (sqlTypeName)) {
115+ return @getOrPut when (DuckDBResultSetMetaData .TypeNameToType (sqlTypeName)) {
104116 BOOLEAN -> typeInformationForValueColumnOf<Boolean >(isNullable)
105117
106118 TINYINT -> typeInformationForValueColumnOf<Byte >(isNullable)
@@ -208,9 +220,45 @@ public object DuckDb : DbType("duckdb") {
208220
209221 // TODO requires #1266 for specific types
210222 STRUCT -> {
211- val structTypes = parseStructType(sqlTypeName)
223+ val structEntries = parseStructType(sqlTypeName)
224+ val parsedStructEntries = structEntries.mapValues { (_, type) ->
225+ parseDuckDbType(sqlTypeName = type, isNullable = true )
226+ }
212227
213- typeInformationForValueColumnOf<Struct >(isNullable)
228+ val targetSchema = ColumnSchema .Group (
229+ schema = DataFrameSchemaImpl (parsedStructEntries.mapValues { it.value.targetSchema }),
230+ contentType = typeOf<Any ?>(),
231+ )
232+
233+ typeInformationWithProcessingFor<Struct , Map <String , Any ?>, DataRow <* >>(
234+ jdbcSourceType = typeOf<Struct >().withNullability(isNullable),
235+ targetSchema = targetSchema,
236+ valuePreprocessor = { struct, _ ->
237+ // NOTE DataRows cannot be `null` in DataFrame, instead, all its fields become `null`
238+ if (struct == null ) {
239+ parsedStructEntries.mapValues { null }
240+ } else {
241+ // read data from the struct
242+ val attrs = struct.getAttributes(
243+ parsedStructEntries.mapValues {
244+ (it.value.jdbcSourceType.classifier!! as KClass <* >).java
245+ },
246+ )
247+
248+ // and potentially, preprocess each value individually
249+ parsedStructEntries.entries.withIndex().associate { (i, entry) ->
250+ entry.key to entry.value.castToAny().preprocess(attrs[i])
251+ }
252+ }
253+ },
254+ columnPostprocessor = { col, _ ->
255+ col.castToNotNullable()
256+ .values()
257+ .toDataFrame()
258+ .asColumnGroup(col.name())
259+ .asDataColumn()
260+ },
261+ )
214262 }
215263
216264 // Cannot handle this in Kotlin
0 commit comments