Skip to content

Commit 56dbc2e

Browse files
committed
introduced ColumnDataHolder to replace the List in DataColumnImpl. This interface can define how the data of columns is stored. ColumnDataHolderImpl was created as default implementation and it defaults to store data in primitive arrays whenever possible
1 parent a8dad48 commit 56dbc2e

File tree

14 files changed

+944
-62
lines changed

14 files changed

+944
-62
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
package org.jetbrains.kotlinx.dataframe
2+
3+
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnDataHolderImpl
4+
import kotlin.reflect.KType
5+
import kotlin.reflect.typeOf
6+
7+
public interface ColumnDataHolder<T> : Iterable<T> {
8+
9+
public val size: Int
10+
11+
public fun toSet(): Set<T>
12+
13+
public fun toList(): List<T>
14+
15+
public fun contains(value: T): Boolean
16+
17+
public operator fun get(index: Int): T
18+
19+
public operator fun get(range: IntRange): List<T>
20+
21+
public val distinct: Lazy<Set<T>>
22+
23+
public companion object
24+
}
25+
26+
public fun <T> Collection<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
27+
ColumnDataHolderImpl.of(this, type, distinct)
28+
29+
public inline fun <reified T> Collection<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
30+
this.toColumnDataHolder(typeOf<T>(), distinct)
31+
32+
public fun <T> Array<T>.toColumnDataHolder(type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
33+
ColumnDataHolderImpl.of(this, type, distinct)
34+
35+
public inline fun <reified T> Array<T>.toColumnDataHolder(distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
36+
this.toColumnDataHolder(typeOf<T>(), distinct)
37+
38+
public fun BooleanArray.asColumnDataHolder(distinct: Lazy<Set<Boolean>>? = null): ColumnDataHolder<Boolean> =
39+
ColumnDataHolderImpl.of(this, typeOf<Boolean>(), distinct)
40+
41+
public fun ByteArray.asColumnDataHolder(distinct: Lazy<Set<Byte>>? = null): ColumnDataHolder<Byte> =
42+
ColumnDataHolderImpl.of(this, typeOf<Byte>(), distinct)
43+
44+
public fun ShortArray.asColumnDataHolder(distinct: Lazy<Set<Short>>? = null): ColumnDataHolder<Short> =
45+
ColumnDataHolderImpl.of(this, typeOf<Short>(), distinct)
46+
47+
public fun IntArray.asColumnDataHolder(distinct: Lazy<Set<Int>>? = null): ColumnDataHolder<Int> =
48+
ColumnDataHolderImpl.of(this, typeOf<Int>(), distinct)
49+
50+
public fun LongArray.asColumnDataHolder(distinct: Lazy<Set<Long>>? = null): ColumnDataHolder<Long> =
51+
ColumnDataHolderImpl.of(this, typeOf<Long>(), distinct)
52+
53+
public fun FloatArray.asColumnDataHolder(distinct: Lazy<Set<Float>>? = null): ColumnDataHolder<Float> =
54+
ColumnDataHolderImpl.of(this, typeOf<Float>(), distinct)
55+
56+
public fun DoubleArray.asColumnDataHolder(distinct: Lazy<Set<Double>>? = null): ColumnDataHolder<Double> =
57+
ColumnDataHolderImpl.of(this, typeOf<Double>(), distinct)
58+
59+
public fun CharArray.asColumnDataHolder(distinct: Lazy<Set<Char>>? = null): ColumnDataHolder<Char> =
60+
ColumnDataHolderImpl.of(this, typeOf<Char>(), distinct)
61+
62+
public fun UByteArray.asColumnDataHolder(distinct: Lazy<Set<UByte>>? = null): ColumnDataHolder<UByte> =
63+
ColumnDataHolderImpl.of(this, typeOf<UByte>(), distinct)
64+
65+
public fun UShortArray.asColumnDataHolder(distinct: Lazy<Set<UShort>>? = null): ColumnDataHolder<UShort> =
66+
ColumnDataHolderImpl.of(this, typeOf<UShort>(), distinct)
67+
68+
public fun UIntArray.asColumnDataHolder(distinct: Lazy<Set<UInt>>? = null): ColumnDataHolder<UInt> =
69+
ColumnDataHolderImpl.of(this, typeOf<UInt>(), distinct)
70+
71+
public fun ULongArray.asColumnDataHolder(distinct: Lazy<Set<ULong>>? = null): ColumnDataHolder<ULong> =
72+
ColumnDataHolderImpl.of(this, typeOf<ULong>(), distinct)

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt

+107-8
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext
1616
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
1717
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
1818
import org.jetbrains.kotlinx.dataframe.columns.ValueColumn
19+
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnDataHolderImpl
1920
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl
2021
import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl
2122
import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl
@@ -42,6 +43,73 @@ public interface DataColumn<out T> : BaseColumn<T> {
4243

4344
public companion object {
4445

46+
public fun <T> createValueColumn(
47+
name: String,
48+
values: ColumnDataHolder<T>,
49+
type: KType,
50+
defaultValue: T? = null,
51+
): ValueColumn<T> = ValueColumnImpl(values, name, type, defaultValue)
52+
53+
public fun createValueColumn(
54+
name: String,
55+
values: BooleanArray,
56+
): ValueColumn<Boolean> = createValueColumn(name, values.asColumnDataHolder(), typeOf<Boolean>())
57+
58+
public fun createValueColumn(
59+
name: String,
60+
values: ByteArray,
61+
): ValueColumn<Byte> = createValueColumn(name, values.asColumnDataHolder(), typeOf<Byte>())
62+
63+
public fun createValueColumn(
64+
name: String,
65+
values: ShortArray,
66+
): ValueColumn<Short> = createValueColumn(name, values.asColumnDataHolder(), typeOf<Short>())
67+
68+
public fun createValueColumn(
69+
name: String,
70+
values: IntArray,
71+
): ValueColumn<Int> = createValueColumn(name, values.asColumnDataHolder(), typeOf<Int>())
72+
73+
public fun createValueColumn(
74+
name: String,
75+
values: LongArray,
76+
): ValueColumn<Long> = createValueColumn(name, values.asColumnDataHolder(), typeOf<Long>())
77+
78+
public fun createValueColumn(
79+
name: String,
80+
values: FloatArray,
81+
): ValueColumn<Float> = createValueColumn(name, values.asColumnDataHolder(), typeOf<Float>())
82+
83+
public fun createValueColumn(
84+
name: String,
85+
values: DoubleArray,
86+
): ValueColumn<Double> = createValueColumn(name, values.asColumnDataHolder(), typeOf<Double>())
87+
88+
public fun createValueColumn(
89+
name: String,
90+
values: CharArray,
91+
): ValueColumn<Char> = createValueColumn(name, values.asColumnDataHolder(), typeOf<Char>())
92+
93+
public fun createValueColumn(
94+
name: String,
95+
values: UByteArray,
96+
): ValueColumn<UByte> = createValueColumn(name, values.asColumnDataHolder(), typeOf<UByte>())
97+
98+
public fun createValueColumn(
99+
name: String,
100+
values: UShortArray,
101+
): ValueColumn<UShort> = createValueColumn(name, values.asColumnDataHolder(), typeOf<UShort>())
102+
103+
public fun createValueColumn(
104+
name: String,
105+
values: UIntArray,
106+
): ValueColumn<UInt> = createValueColumn(name, values.asColumnDataHolder(), typeOf<UInt>())
107+
108+
public fun createValueColumn(
109+
name: String,
110+
values: ULongArray,
111+
): ValueColumn<ULong> = createValueColumn(name, values.asColumnDataHolder(), typeOf<ULong>())
112+
45113
/**
46114
* Creates [ValueColumn] using given [name], [values] and [type].
47115
*
@@ -56,7 +124,15 @@ public interface DataColumn<out T> : BaseColumn<T> {
56124
type: KType,
57125
infer: Infer = Infer.None,
58126
defaultValue: T? = null,
59-
): ValueColumn<T> = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue)
127+
): ValueColumn<T> {
128+
val valueType = getValuesType(values, type, infer)
129+
return createValueColumn(
130+
name = name,
131+
values = ColumnDataHolderImpl.of(values, valueType),
132+
type = valueType,
133+
defaultValue = defaultValue
134+
)
135+
}
60136

61137
/**
62138
* Creates [ValueColumn] using given [name], [values] and reified column [type].
@@ -73,12 +149,35 @@ public interface DataColumn<out T> : BaseColumn<T> {
73149
values: List<T>,
74150
infer: Infer = Infer.None,
75151
): ValueColumn<T> = createValueColumn(
76-
name, values,
77-
getValuesType(
78-
values,
79-
typeOf<T>(),
80-
infer
152+
name = name,
153+
values = values,
154+
type = getValuesType(values, typeOf<T>(), infer)
155+
)
156+
157+
public fun <T> createValueColumn(
158+
name: String,
159+
values: Array<T>,
160+
type: KType,
161+
infer: Infer = Infer.None,
162+
defaultValue: T? = null,
163+
): ValueColumn<T> {
164+
val valueType = getValuesType(values.asList(), type, infer)
165+
return createValueColumn(
166+
name = name,
167+
values = ColumnDataHolderImpl.of(values, valueType),
168+
type = valueType,
169+
defaultValue = defaultValue
81170
)
171+
}
172+
173+
public inline fun <reified T> createValueColumn(
174+
name: String,
175+
values: Array<T>,
176+
infer: Infer = Infer.None,
177+
): ValueColumn<T> = createValueColumn(
178+
name = name,
179+
values = values,
180+
type = getValuesType(values.asList(), typeOf<T>(), infer)
82181
)
83182

84183
public fun <T> createColumnGroup(name: String, df: DataFrame<T>): ColumnGroup<T> = ColumnGroupImpl(name, df)
@@ -88,13 +187,13 @@ public interface DataColumn<out T> : BaseColumn<T> {
88187
df: DataFrame<T>,
89188
startIndices: Iterable<Int>,
90189
): FrameColumn<T> =
91-
FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() })
190+
FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList().toColumnDataHolder(), lazy { df.schema() })
92191

93192
public fun <T> createFrameColumn(
94193
name: String,
95194
groups: List<DataFrame<T>>,
96195
schema: Lazy<DataFrameSchema>? = null,
97-
): FrameColumn<T> = FrameColumnImpl(name, groups, schema)
196+
): FrameColumn<T> = FrameColumnImpl(name, groups.toColumnDataHolder(), schema)
98197

99198
public fun <T> createWithTypeInference(
100199
name: String,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
@file:OptIn(ExperimentalUnsignedTypes::class)
2+
3+
package org.jetbrains.kotlinx.dataframe.impl.columns
4+
5+
import org.jetbrains.kotlinx.dataframe.ColumnDataHolder
6+
import org.jetbrains.kotlinx.dataframe.impl.asList
7+
import org.jetbrains.kotlinx.dataframe.impl.isPrimitiveArray
8+
import kotlin.reflect.KType
9+
import kotlin.reflect.typeOf
10+
11+
internal class ColumnDataHolderImpl<T> private constructor(
12+
private val list: List<T>,
13+
distinct: Lazy<Set<T>>?,
14+
) : ColumnDataHolder<T> {
15+
16+
override val distinct = distinct ?: lazy { list.toSet() }
17+
override val size: Int get() = list.size
18+
19+
override fun toSet(): Set<T> = distinct.value
20+
override fun toList(): List<T> = list
21+
override fun get(index: Int): T = list[index]
22+
override fun get(range: IntRange): List<T> = list.subList(range.first, range.last + 1)
23+
override fun contains(value: T): Boolean = list.contains(value)
24+
override fun iterator(): Iterator<T> = list.iterator()
25+
26+
companion object {
27+
28+
/**
29+
* Constructs [ColumnDataHolderImpl] using an [asList] wrapper around the [list].
30+
*/
31+
@Suppress("UNCHECKED_CAST")
32+
internal fun <T> of(list: Collection<T>, type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> {
33+
if (list is ColumnDataHolder<*>) return list as ColumnDataHolder<T>
34+
35+
return try {
36+
when (type) {
37+
BOOLEAN -> ColumnDataHolderImpl((list as Collection<Boolean>).toBooleanArray().asList(), distinct)
38+
BYTE -> ColumnDataHolderImpl((list as Collection<Byte>).toByteArray().asList(), distinct)
39+
SHORT -> ColumnDataHolderImpl((list as Collection<Short>).toShortArray().asList(), distinct)
40+
INT -> ColumnDataHolderImpl((list as Collection<Int>).toIntArray().asList(), distinct)
41+
LONG -> ColumnDataHolderImpl((list as Collection<Long>).toLongArray().asList(), distinct)
42+
FLOAT -> ColumnDataHolderImpl((list as Collection<Float>).toFloatArray().asList(), distinct)
43+
DOUBLE -> ColumnDataHolderImpl((list as Collection<Double>).toDoubleArray().asList(), distinct)
44+
CHAR -> ColumnDataHolderImpl((list as Collection<Char>).toCharArray().asList(), distinct)
45+
UBYTE -> ColumnDataHolderImpl((list as Collection<UByte>).toUByteArray().asList(), distinct)
46+
USHORT -> ColumnDataHolderImpl((list as Collection<UShort>).toUShortArray().asList(), distinct)
47+
UINT -> ColumnDataHolderImpl((list as Collection<UInt>).toUIntArray().asList(), distinct)
48+
ULONG -> ColumnDataHolderImpl((list as Collection<ULong>).toULongArray().asList(), distinct)
49+
else -> ColumnDataHolderImpl(list.asList(), distinct)
50+
} as ColumnDataHolder<T>
51+
} catch (e: Exception) {
52+
throw IllegalArgumentException("Can't create ColumnDataHolder from $list and type $type", e)
53+
}
54+
}
55+
56+
/**
57+
* Constructs [ColumnDataHolderImpl] using an [asList] wrapper around the [array].
58+
* If [array] is an array of primitives, it will be converted to a primitive array first before being
59+
* wrapped with [asList].
60+
*/
61+
@Suppress("UNCHECKED_CAST")
62+
internal fun <T> of(array: Array<T>, type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
63+
try {
64+
when (type) {
65+
BOOLEAN -> ColumnDataHolderImpl((array as Array<Boolean>).toBooleanArray().asList(), distinct)
66+
BYTE -> ColumnDataHolderImpl((array as Array<Byte>).toByteArray().asList(), distinct)
67+
SHORT -> ColumnDataHolderImpl((array as Array<Short>).toShortArray().asList(), distinct)
68+
INT -> ColumnDataHolderImpl((array as Array<Int>).toIntArray().asList(), distinct)
69+
LONG -> ColumnDataHolderImpl((array as Array<Long>).toLongArray().asList(), distinct)
70+
FLOAT -> ColumnDataHolderImpl((array as Array<Float>).toFloatArray().asList(), distinct)
71+
DOUBLE -> ColumnDataHolderImpl((array as Array<Double>).toDoubleArray().asList(), distinct)
72+
CHAR -> ColumnDataHolderImpl((array as Array<Char>).toCharArray().asList(), distinct)
73+
UBYTE -> ColumnDataHolderImpl((array as Array<UByte>).toUByteArray().asList(), distinct)
74+
USHORT -> ColumnDataHolderImpl((array as Array<UShort>).toUShortArray().asList(), distinct)
75+
UINT -> ColumnDataHolderImpl((array as Array<UInt>).toUIntArray().asList(), distinct)
76+
ULONG -> ColumnDataHolderImpl((array as Array<ULong>).toULongArray().asList(), distinct)
77+
else -> ColumnDataHolderImpl(array.asList(), distinct)
78+
} as ColumnDataHolder<T>
79+
} catch (e: Exception) {
80+
throw IllegalArgumentException(
81+
"Can't create ColumnDataHolder from $array and mismatching type $type",
82+
e
83+
)
84+
}
85+
86+
/**
87+
* Constructs [ColumnDataHolderImpl] using an [asList] wrapper around the [primitiveArray].
88+
* [primitiveArray] must be an array of primitives, returns `null` if something goes wrong.
89+
*/
90+
@Suppress("UNCHECKED_CAST")
91+
internal fun <T> of(primitiveArray: Any, type: KType, distinct: Lazy<Set<T>>? = null): ColumnDataHolder<T> =
92+
when {
93+
type == BOOLEAN && primitiveArray is BooleanArray ->
94+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
95+
96+
type == BYTE && primitiveArray is ByteArray ->
97+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
98+
99+
type == SHORT && primitiveArray is ShortArray ->
100+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
101+
102+
type == INT && primitiveArray is IntArray ->
103+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
104+
105+
type == LONG && primitiveArray is LongArray ->
106+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
107+
108+
type == FLOAT && primitiveArray is FloatArray ->
109+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
110+
111+
type == DOUBLE && primitiveArray is DoubleArray ->
112+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
113+
114+
type == CHAR && primitiveArray is CharArray ->
115+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
116+
117+
type == UBYTE && primitiveArray is UByteArray ->
118+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
119+
120+
type == USHORT && primitiveArray is UShortArray ->
121+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
122+
123+
type == UINT && primitiveArray is UIntArray ->
124+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
125+
126+
type == ULONG && primitiveArray is ULongArray ->
127+
ColumnDataHolderImpl(primitiveArray.asList(), distinct)
128+
129+
!primitiveArray.isPrimitiveArray ->
130+
throw IllegalArgumentException(
131+
"Can't create ColumnDataHolder from non primitive array $primitiveArray and type $type"
132+
)
133+
134+
else ->
135+
throw IllegalArgumentException(
136+
"Can't create ColumnDataHolder from primitive array $primitiveArray and type $type"
137+
)
138+
} as ColumnDataHolder<T>
139+
}
140+
}
141+
142+
private val BOOLEAN = typeOf<Boolean>()
143+
private val BYTE = typeOf<Byte>()
144+
private val SHORT = typeOf<Short>()
145+
private val INT = typeOf<Int>()
146+
private val LONG = typeOf<Long>()
147+
private val FLOAT = typeOf<Float>()
148+
private val DOUBLE = typeOf<Double>()
149+
private val CHAR = typeOf<Char>()
150+
private val UBYTE = typeOf<UByte>()
151+
private val USHORT = typeOf<UShort>()
152+
private val UINT = typeOf<UInt>()
153+
private val ULONG = typeOf<ULong>()

0 commit comments

Comments
 (0)