Skip to content

Commit ff1f428

Browse files
committed
introducing parsing of Char? columns. It works the same as String parsing, failing when the returned type is Char or String
1 parent 60ebe64 commit ff1f428

File tree

12 files changed

+169
-21
lines changed

12 files changed

+169
-21
lines changed

core/api/core.api

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3547,8 +3547,12 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParseKt {
35473547
public static synthetic fun parse$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
35483548
public static final fun parseAnyFrameNullable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
35493549
public static synthetic fun parseAnyFrameNullable$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
3550+
public static final fun parseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
3551+
public static synthetic fun parseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
35503552
public static final fun tryParse (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
35513553
public static synthetic fun tryParse$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
3554+
public static final fun tryParseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
3555+
public static synthetic fun tryParseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
35523556
}
35533557

35543558
public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions {

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import java.time.format.DateTimeFormatter
2020
import java.util.Locale
2121
import kotlin.reflect.KProperty
2222
import kotlin.reflect.KType
23+
import kotlin.reflect.typeOf
2324
import kotlin.uuid.ExperimentalUuidApi
2425
import kotlin.uuid.Uuid
2526

@@ -312,6 +313,28 @@ public class ParserOptions(
312313
* @return a new column with parsed values */
313314
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)
314315

316+
/**
317+
* Tries to parse a column of chars into a column of a different type.
318+
* Each parser in [Parsers] is run in order until a valid parser is found,
319+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
320+
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
321+
* returns strings.
322+
*
323+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
324+
*
325+
* @param options options for parsing, like providing a locale or a custom date-time formatter
326+
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
327+
* @return a new column with parsed values
328+
*/
329+
@JvmName("tryParseChar")
330+
public fun DataColumn<Char?>.tryParse(options: ParserOptions? = null): DataColumn<*> {
331+
// skip the Char parser, as we're trying to parse away from Char
332+
val providedSkipTypes = options?.skipTypes ?: DataFrame.parser.skipTypes
333+
val parserOptions = (options ?: ParserOptions()).copy(skipTypes = providedSkipTypes + typeOf<Char>())
334+
335+
return map { it?.toString() }.tryParse(parserOptions)
336+
}
337+
315338
public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
316339
parse(options) {
317340
colsAtAnyDepth().filter { !it.isColumnGroup() }
@@ -335,6 +358,23 @@ public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T>
335358
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
336359
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }
337360

361+
/**
362+
* Tries to parse a column of chars as strings into a column of a different type.
363+
* Each parser in [Parsers] is run in order until a valid parser is found,
364+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
365+
* fails to parse any value, the next parser is tried.
366+
*
367+
* If all fail, the column is returned as `String`, this can never fail.
368+
*
369+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
370+
*
371+
* @param options options for parsing, like providing a locale or a custom date-time formatter
372+
* @return a new column with parsed values
373+
*/
374+
@JvmName("parseChar")
375+
public fun DataColumn<Char?>.parse(options: ParserOptions? = null): DataColumn<*> =
376+
tryParse(options) // no need to throw an exception, as Char can always be parsed as String
377+
338378
@JvmName("parseAnyFrameNullable")
339379
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =
340380
map { it?.parse(options) }

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convert.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,13 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n
369369

370370
Char::class -> when (toClass) {
371371
Int::class -> convert<Char> { it.code }
372-
else -> null
372+
373+
else -> // convert char to string and then to target type
374+
getConverter(typeOf<String>(), to, options)?.let { stringConverter ->
375+
convert<Char> {
376+
stringConverter(it.toString())
377+
}
378+
}
373379
}
374380

375381
Int::class -> when (toClass) {

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -716,29 +716,24 @@ internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: Column
716716
when {
717717
// when a frame column is requested to be parsed,
718718
// parse each value/frame column at any depth inside each DataFrame in the frame column
719-
col.isFrameColumn() -> {
719+
col.isFrameColumn() ->
720720
col.map {
721721
it.parseImpl(options) {
722722
colsAtAnyDepth().filter { !it.isColumnGroup() }
723723
}
724724
}
725-
}
726725

727726
// when a column group is requested to be parsed,
728727
// parse each column in the group
729-
col.isColumnGroup() -> {
728+
col.isColumnGroup() ->
730729
col.parseImpl(options) { all() }
731730
.asColumnGroup(col.name())
732731
.asDataColumn()
733-
}
734732

735733
// Base case, parse the column if it's a `String?` column
736-
col.isSubtypeOf<String?>() -> {
734+
col.isSubtypeOf<String?>() ->
737735
col.cast<String?>().tryParseImpl(options)
738-
}
739736

740-
else -> {
741-
col
742-
}
737+
else -> col
743738
}
744739
}

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.api
33
import io.kotest.assertions.throwables.shouldNotThrow
44
import io.kotest.assertions.throwables.shouldThrow
55
import io.kotest.matchers.shouldBe
6+
import io.kotest.matchers.shouldNotBe
67
import kotlinx.datetime.Clock
78
import kotlinx.datetime.Instant
89
import kotlinx.datetime.LocalTime
@@ -69,6 +70,20 @@ class ConvertTests {
6970
@Test
7071
fun `convert string to enum`() {
7172
columnOf("A", "B").convertTo<EnumClass>() shouldBe columnOf(EnumClass.A, EnumClass.B)
73+
74+
dataFrameOf(columnOf("A", "B") named "colA")
75+
.convert("colA").to<EnumClass>()
76+
.getColumn("colA") shouldBe columnOf(EnumClass.A, EnumClass.B).named("colA")
77+
}
78+
79+
@Test
80+
fun `convert char to enum`() {
81+
// Char -> String -> Enum
82+
columnOf('A', 'B').convertTo<EnumClass>() shouldBe columnOf(EnumClass.A, EnumClass.B)
83+
84+
dataFrameOf(columnOf('A', 'B') named "colA")
85+
.convert("colA").to<EnumClass>()
86+
.getColumn("colA") shouldBe columnOf(EnumClass.A, EnumClass.B).named("colA")
7287
}
7388

7489
@JvmInline
@@ -199,6 +214,15 @@ class ConvertTests {
199214
val col = columnOf(65, 66)
200215
col.convertTo<Char>() shouldBe columnOf('A', 'B')
201216
col.convertTo<Char>().convertTo<Int>() shouldBe col
217+
218+
// this means
219+
columnOf('1', '2').convertToInt() shouldNotBe columnOf(1, 2)
220+
columnOf('1', '2').convertToInt() shouldBe columnOf(49, 50)
221+
222+
// but
223+
columnOf('1', '2').convertToString().convertToInt() shouldBe columnOf(1, 2)
224+
// or
225+
columnOf('1', '2').parse() shouldBe columnOf(1, 2)
202226
}
203227

204228
@Test

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,22 @@ import kotlin.time.Instant as StdlibInstant
3838
import kotlinx.datetime.Instant as DeprecatedInstant
3939

4040
class ParseTests {
41+
42+
@Test
43+
fun `parse to chars`() {
44+
val char = columnOf('a', 'b', 'c')
45+
char.parse() shouldBe char
46+
char.tryParse() shouldBe char
47+
char.convertToString().parse() shouldBe char
48+
}
49+
50+
@Test
51+
fun `parse chars to int`() {
52+
val char = columnOf('1', '2', '3')
53+
char.parse() shouldBe columnOf(1, 2, 3)
54+
char.tryParse() shouldBe columnOf(1, 2, 3)
55+
}
56+
4157
@Test
4258
fun parseDate() {
4359
val currentLocale = Locale.getDefault()

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ class ParserTests {
4646
DataFrame.parser.resetToDefault()
4747
}
4848

49+
@Test
50+
fun `parse to Char`() {
51+
val col by columnOf("a", "b")
52+
col.parse().type() shouldBe typeOf<Char>()
53+
}
54+
4955
@Test(expected = IllegalStateException::class)
5056
fun `parse should throw`() {
5157
val col by columnOf("a", "bc")

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
1212
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
1313
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
1414
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
15-
import org.jetbrains.kotlinx.dataframe.typeClass
1615
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
1716
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
1817
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS_COPY
@@ -302,6 +301,23 @@ public class ParserOptions(
302301
/** @include [tryParseImpl] */
303302
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)
304303

304+
/**
305+
* Tries to parse a column of chars into a column of a different type.
306+
* Each parser in [Parsers] is run in order until a valid parser is found,
307+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
308+
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
309+
* returns strings.
310+
*
311+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
312+
*
313+
* @param options options for parsing, like providing a locale or a custom date-time formatter
314+
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
315+
* @return a new column with parsed values
316+
*/
317+
@JvmName("tryParseChar")
318+
public fun DataColumn<Char?>.tryParse(options: ParserOptions? = null): DataColumn<*> =
319+
map { it?.toString() }.tryParseImpl(options)
320+
305321
public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
306322
parse(options) {
307323
colsAtAnyDepth().filter { !it.isColumnGroup() }
@@ -323,7 +339,27 @@ public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T>
323339
* @return a new column with parsed values
324340
*/
325341
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
326-
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }
342+
tryParse(options).also { if (it.isSubtypeOf<String?>()) error("Can't guess column type") }
343+
344+
/**
345+
* Tries to parse a column of chars as strings into a column of a different type.
346+
* Each parser in [Parsers] is run in order until a valid parser is found,
347+
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
348+
* fails to parse any value, the next parser is tried.
349+
*
350+
* If all fail [IllegalStateException] is thrown. If you don't want this exception to be thrown,
351+
* use [tryParse] instead.
352+
*
353+
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
354+
*
355+
* @param options options for parsing, like providing a locale or a custom date-time formatter
356+
* @return a new column with parsed values
357+
*/
358+
@JvmName("parseChar")
359+
public fun DataColumn<Char?>.parse(options: ParserOptions? = null): DataColumn<*> =
360+
map { it?.toString() }
361+
.tryParse(options)
362+
.also { if (it.isSubtypeOf<Char?>() || it.isSubtypeOf<String?>()) error("Can't guess column type") }
327363

328364
@JvmName("parseAnyFrameNullable")
329365
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -716,29 +716,24 @@ internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: Column
716716
when {
717717
// when a frame column is requested to be parsed,
718718
// parse each value/frame column at any depth inside each DataFrame in the frame column
719-
col.isFrameColumn() -> {
719+
col.isFrameColumn() ->
720720
col.map {
721721
it.parseImpl(options) {
722722
colsAtAnyDepth().filter { !it.isColumnGroup() }
723723
}
724724
}
725-
}
726725

727726
// when a column group is requested to be parsed,
728727
// parse each column in the group
729-
col.isColumnGroup() -> {
728+
col.isColumnGroup() ->
730729
col.parseImpl(options) { all() }
731730
.asColumnGroup(col.name())
732731
.asDataColumn()
733-
}
734732

735733
// Base case, parse the column if it's a `String?` column
736-
col.isSubtypeOf<String?>() -> {
734+
col.isSubtypeOf<String?>() ->
737735
col.cast<String?>().tryParseImpl(options)
738-
}
739736

740-
else -> {
741-
col
742-
}
737+
else -> col
743738
}
744739
}

core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ class ConvertTests {
221221

222222
// but
223223
columnOf('1', '2').convertToString().convertToInt() shouldBe columnOf(1, 2)
224+
// or
225+
columnOf('1', '2').parse() shouldBe columnOf(1, 2)
224226
}
225227

226228
@Test

0 commit comments

Comments
 (0)