Skip to content

Commit b24db62

Browse files
committed
moved Compression to :core and simplified it to a fun interface
1 parent b57ec5b commit b24db62

File tree

15 files changed

+166
-251
lines changed

15 files changed

+166
-251
lines changed

core/api/core.api

+59
Original file line numberDiff line numberDiff line change
@@ -10203,6 +10203,65 @@ public final class org/jetbrains/kotlinx/dataframe/io/CommonKt {
1020310203
public static final fun urlAsFile (Ljava/net/URL;)Ljava/io/File;
1020410204
}
1020510205

10206+
public abstract interface class org/jetbrains/kotlinx/dataframe/io/Compression {
10207+
public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Compression$Companion;
10208+
public abstract fun doFinally (Ljava/io/InputStream;)V
10209+
public abstract fun doFirst (Ljava/io/InputStream;)V
10210+
public abstract fun wrapStream (Ljava/io/InputStream;)Ljava/io/InputStream;
10211+
}
10212+
10213+
public final class org/jetbrains/kotlinx/dataframe/io/Compression$Companion {
10214+
public final fun of (Ljava/io/File;)Lorg/jetbrains/kotlinx/dataframe/io/Compression;
10215+
public final fun of (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/Compression;
10216+
public final fun of (Ljava/net/URL;)Lorg/jetbrains/kotlinx/dataframe/io/Compression;
10217+
public final fun of (Ljava/nio/file/Path;)Lorg/jetbrains/kotlinx/dataframe/io/Compression;
10218+
}
10219+
10220+
public final class org/jetbrains/kotlinx/dataframe/io/Compression$DefaultImpls {
10221+
public static fun doFinally (Lorg/jetbrains/kotlinx/dataframe/io/Compression;Ljava/io/InputStream;)V
10222+
public static fun doFirst (Lorg/jetbrains/kotlinx/dataframe/io/Compression;Ljava/io/InputStream;)V
10223+
}
10224+
10225+
public final class org/jetbrains/kotlinx/dataframe/io/Compression$Gzip : org/jetbrains/kotlinx/dataframe/io/Compression {
10226+
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$Gzip;
10227+
public synthetic fun doFinally (Ljava/io/InputStream;)V
10228+
public fun doFinally (Ljava/util/zip/GZIPInputStream;)V
10229+
public synthetic fun doFirst (Ljava/io/InputStream;)V
10230+
public fun doFirst (Ljava/util/zip/GZIPInputStream;)V
10231+
public fun equals (Ljava/lang/Object;)Z
10232+
public fun hashCode ()I
10233+
public fun toString ()Ljava/lang/String;
10234+
public synthetic fun wrapStream (Ljava/io/InputStream;)Ljava/io/InputStream;
10235+
public fun wrapStream (Ljava/io/InputStream;)Ljava/util/zip/GZIPInputStream;
10236+
}
10237+
10238+
public final class org/jetbrains/kotlinx/dataframe/io/Compression$None : org/jetbrains/kotlinx/dataframe/io/Compression {
10239+
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$None;
10240+
public fun doFinally (Ljava/io/InputStream;)V
10241+
public fun doFirst (Ljava/io/InputStream;)V
10242+
public fun equals (Ljava/lang/Object;)Z
10243+
public fun hashCode ()I
10244+
public fun toString ()Ljava/lang/String;
10245+
public fun wrapStream (Ljava/io/InputStream;)Ljava/io/InputStream;
10246+
}
10247+
10248+
public final class org/jetbrains/kotlinx/dataframe/io/Compression$Zip : org/jetbrains/kotlinx/dataframe/io/Compression {
10249+
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$Zip;
10250+
public synthetic fun doFinally (Ljava/io/InputStream;)V
10251+
public fun doFinally (Ljava/util/zip/ZipInputStream;)V
10252+
public synthetic fun doFirst (Ljava/io/InputStream;)V
10253+
public fun doFirst (Ljava/util/zip/ZipInputStream;)V
10254+
public fun equals (Ljava/lang/Object;)Z
10255+
public fun hashCode ()I
10256+
public fun toString ()Ljava/lang/String;
10257+
public synthetic fun wrapStream (Ljava/io/InputStream;)Ljava/io/InputStream;
10258+
public fun wrapStream (Ljava/io/InputStream;)Ljava/util/zip/ZipInputStream;
10259+
}
10260+
10261+
public final class org/jetbrains/kotlinx/dataframe/io/CompressionKt {
10262+
public static final fun useDecompressed (Ljava/io/InputStream;Lorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
10263+
}
10264+
1020610265
public final class org/jetbrains/kotlinx/dataframe/io/CsvKt {
1020710266
public static final fun asURL (Ljava/lang/String;)Ljava/net/URL;
1020810267
public static final fun read (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/lang/String;CLjava/util/List;Ljava/util/Map;ILjava/lang/Integer;ZLjava/nio/charset/Charset;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;

core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt

+2-2
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,8 @@ class Schemas {
127127
@TransformDataFrameExpressions
128128
fun useInferredSchema() {
129129
// SampleStart
130-
// Repository.readCSV() has argument 'path' with default value https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv
131-
val df = Repository.readCSV()
130+
// Repository.readCsv() has argument 'path' with default value https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv
131+
val df = Repository.readCsv()
132132
// Use generated properties to access data in rows
133133
df.maxBy { stargazersCount }.print()
134134
// Or to access columns in dataframe.
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,12 @@ import java.util.zip.InflaterInputStream
99
import java.util.zip.ZipInputStream
1010

1111
/**
12-
* Compression algorithm to use when reading csv files.
12+
* Compression algorithm to use when reading files.
1313
* We support [GZIP][Compression.Gzip] and [ZIP][Compression.Zip] compression out of the box.
1414
*
15-
* Custom decompression algorithms can be added by creating an instance of [Custom].
16-
*
17-
* @param wrapStream function that wraps any [InputStream] into a decompressing [InflaterInputStream] stream
15+
* Custom decompression algorithms can be added by creating an instance of [Compression].
1816
*/
19-
public sealed class Compression<I : InputStream>(public open val wrapStream: (InputStream) -> I) {
20-
17+
public fun interface Compression<I : InputStream> {
2118
public companion object {
2219
public fun of(fileOrUrl: String): Compression<*> =
2320
when (fileOrUrl.split(".").last()) {
@@ -33,26 +30,25 @@ public sealed class Compression<I : InputStream>(public open val wrapStream: (In
3330
public fun of(url: URL): Compression<*> = of(url.path)
3431
}
3532

33+
/** Wraps any [InputStream] into a decompressing [InflaterInputStream] stream */
34+
public fun wrapStream(inputStream: InputStream): I
35+
3636
/** Can be overridden to perform some actions before reading from the input stream. */
37-
public open fun doFirst(inputStream: I) {}
37+
public fun doFirst(inputStream: I) {}
3838

3939
/**
4040
* Can be overridden to perform some actions after reading from the input stream.
4141
* Remember to close the stream if you override this function.
4242
*/
43-
public open fun doFinally(inputStream: I) {
43+
public fun doFinally(inputStream: I) {
4444
inputStream.close()
4545
}
4646

47-
/**
48-
* For .gz / GZIP files.
49-
*/
50-
public data object Gzip : Compression<GZIPInputStream>(wrapStream = ::GZIPInputStream)
47+
/** For .gz / GZIP files */
48+
public data object Gzip : Compression<GZIPInputStream> by Compression(::GZIPInputStream)
5149

52-
/**
53-
* For .zip / ZIP files.
54-
*/
55-
public data object Zip : Compression<ZipInputStream>(wrapStream = ::ZipInputStream) {
50+
/** For .zip / ZIP files */
51+
public data object Zip : Compression<ZipInputStream> by Compression(::ZipInputStream) {
5652

5753
override fun doFirst(inputStream: ZipInputStream) {
5854
// Make sure to call nextEntry once to prepare the stream
@@ -69,21 +65,8 @@ public sealed class Compression<I : InputStream>(public open val wrapStream: (In
6965
}
7066
}
7167

72-
/**
73-
* No compression.
74-
*/
75-
public data object None : Compression<InputStream>(wrapStream = { it })
76-
77-
/**
78-
* Custom decompression algorithm.
79-
*
80-
* Can either be extended or instantiated directly with a custom [wrapStream] function.
81-
* @param wrapStream function that wraps any [InputStream] into a decompressing [InputStream]
82-
*/
83-
public open class Custom<I : InputStream>(override val wrapStream: (InputStream) -> I) :
84-
Compression<I>(wrapStream = wrapStream) {
85-
override fun toString(): String = "Compression.Custom(wrapStream = $wrapStream)"
86-
}
68+
/** No compression */
69+
public data object None : Compression<InputStream> by Compression({ it })
8770
}
8871

8972
/**

dataframe-csv/api/dataframe-csv.api

-50
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,3 @@
1-
public abstract class org/jetbrains/kotlinx/dataframe/io/Compression {
2-
public static final field Companion Lorg/jetbrains/kotlinx/dataframe/io/Compression$Companion;
3-
public synthetic fun <init> (Lkotlin/jvm/functions/Function1;Lkotlin/jvm/internal/DefaultConstructorMarker;)V
4-
public fun doFinally (Ljava/io/InputStream;)V
5-
public fun doFirst (Ljava/io/InputStream;)V
6-
public fun getWrapStream ()Lkotlin/jvm/functions/Function1;
7-
}
8-
9-
public final class org/jetbrains/kotlinx/dataframe/io/Compression$Companion {
10-
public final fun of (Ljava/io/File;)Lorg/jetbrains/kotlinx/dataframe/io/Compression;
11-
public final fun of (Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/Compression;
12-
public final fun of (Ljava/net/URL;)Lorg/jetbrains/kotlinx/dataframe/io/Compression;
13-
public final fun of (Ljava/nio/file/Path;)Lorg/jetbrains/kotlinx/dataframe/io/Compression;
14-
}
15-
16-
public class org/jetbrains/kotlinx/dataframe/io/Compression$Custom : org/jetbrains/kotlinx/dataframe/io/Compression {
17-
public fun <init> (Lkotlin/jvm/functions/Function1;)V
18-
public fun getWrapStream ()Lkotlin/jvm/functions/Function1;
19-
public fun toString ()Ljava/lang/String;
20-
}
21-
22-
public final class org/jetbrains/kotlinx/dataframe/io/Compression$Gzip : org/jetbrains/kotlinx/dataframe/io/Compression {
23-
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$Gzip;
24-
public fun equals (Ljava/lang/Object;)Z
25-
public fun hashCode ()I
26-
public fun toString ()Ljava/lang/String;
27-
}
28-
29-
public final class org/jetbrains/kotlinx/dataframe/io/Compression$None : org/jetbrains/kotlinx/dataframe/io/Compression {
30-
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$None;
31-
public fun equals (Ljava/lang/Object;)Z
32-
public fun hashCode ()I
33-
public fun toString ()Ljava/lang/String;
34-
}
35-
36-
public final class org/jetbrains/kotlinx/dataframe/io/Compression$Zip : org/jetbrains/kotlinx/dataframe/io/Compression {
37-
public static final field INSTANCE Lorg/jetbrains/kotlinx/dataframe/io/Compression$Zip;
38-
public synthetic fun doFinally (Ljava/io/InputStream;)V
39-
public fun doFinally (Ljava/util/zip/ZipInputStream;)V
40-
public synthetic fun doFirst (Ljava/io/InputStream;)V
41-
public fun doFirst (Ljava/util/zip/ZipInputStream;)V
42-
public fun equals (Ljava/lang/Object;)Z
43-
public fun hashCode ()I
44-
public fun toString ()Ljava/lang/String;
45-
}
46-
47-
public final class org/jetbrains/kotlinx/dataframe/io/CompressionKt {
48-
public static final fun useDecompressed (Ljava/io/InputStream;Lorg/jetbrains/kotlinx/dataframe/io/Compression;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
49-
}
50-
511
public final class org/jetbrains/kotlinx/dataframe/io/CsvDeephaven : org/jetbrains/kotlinx/dataframe/io/SupportedDataFrameFormat {
522
public fun <init> ()V
533
public fun <init> (C)V

dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DelimParams.kt

-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import io.deephaven.csv.CsvSpecs
44
import org.apache.commons.csv.CSVFormat
55
import org.jetbrains.kotlinx.dataframe.DataFrame
66
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
7-
import org.jetbrains.kotlinx.dataframe.api.parser
87
import org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses
98
import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat
109
import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs

dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/readDelim.kt

+18-26
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ import java.io.InputStream
4545
import java.math.BigDecimal
4646
import java.math.BigInteger
4747
import java.net.URL
48-
import java.util.Locale
4948
import kotlin.reflect.KType
5049
import kotlin.reflect.full.withNullability
5150
import kotlin.reflect.typeOf
@@ -62,7 +61,7 @@ import kotlin.time.Duration
6261
* (use [skipLines] if there's a header in the data).
6362
* If empty (default), the header will be read from the data.
6463
* @param compression The compression of the data.
65-
* Default: [Compression.None][org.jetbrains.kotlinx.dataframe.io.Compression.None], unless detected otherwise from the input file or url.
64+
* Default: [Compression.None], unless detected otherwise from the input file or url.
6665
* @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type.
6766
*
6867
* If supplied for a certain column name (inferred from data or given by [header]),
@@ -170,14 +169,7 @@ internal fun readDelimImpl(
170169
hasFixedWidthColumns(hasFixedWidthColumns)
171170
if (hasFixedWidthColumns && fixedColumnWidths.isNotEmpty()) fixedColumnWidths(fixedColumnWidths)
172171
skipLines(takeHeaderFromCsv = header.isEmpty(), skipLines = skipLines)
173-
174-
// Deephaven's LocalDateTime parser is unconfigurable, so if the user provides a locale, pattern, or formatter
175-
// that's not compatible, we must use our own parser for LocalDateTime and let Deephaven read them as Strings.
176-
val useDeepHavenLocalDateTime =
177-
(parserOptions?.locale ?: DataFrame.parser.locale) in setOf(Locale.ROOT, Locale.US, Locale.ENGLISH) &&
178-
parserOptions?.dateTimePattern == null &&
179-
parserOptions?.dateTimeFormatter == null
180-
parsers(parserOptions, colTypes, useDeepHavenLocalDateTime)
172+
parsers(parserOptions, colTypes)
181173

182174
adjustCsvSpecs(this, this)
183175
}.build()
@@ -310,41 +302,43 @@ private fun CsvSpecs.Builder.skipLines(takeHeaderFromCsv: Boolean, skipLines: Lo
310302
* Logic overview:
311303
*
312304
* - if no [colTypes] are given
313-
* - let deephaven use all its [default parsers][Parsers.DEFAULT]
305+
* - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME]
314306
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
315307
* - if [colTypes] are supplied
316308
* - if [ColType.DEFAULT] is among the values
317309
* - set the parser for each supplied column+colType
318310
* - let deephaven use _only_ the parser given as [ColType.DEFAULT] type
319311
* - if [ColType.DEFAULT] is not among the values
320312
* - set the parser for each supplied column+coltype
321-
* - let deephaven use all its [default parsers][Parsers.DEFAULT]
313+
* - let deephaven use all its [default parsers][Parsers.DEFAULT] minus [Parsers.DATETIME]
322314
* - subtract parsers of [skipTypes][ParserOptions.skipTypes] if those are supplied
323315
*
316+
* We will not use [Deephaven's DateTime parser][Parsers.DATETIME].
317+
* This is done to avoid different behavior compared to [DataFrame.parse];
318+
* Deephaven parses [Instant] as [LocalDateTime]. [Issue #1047](https://github.com/Kotlin/dataframe/issues/1047)
319+
*
324320
* Note that `skipTypes` will never skip a type explicitly set by `colTypes`.
325321
* This is intended.
326322
*/
327-
private fun CsvSpecs.Builder.parsers(
328-
parserOptions: ParserOptions?,
329-
colTypes: Map<String, ColType>,
330-
useDeepHavenLocalDateTime: Boolean,
331-
): CsvSpecs.Builder {
323+
private fun CsvSpecs.Builder.parsers(parserOptions: ParserOptions?, colTypes: Map<String, ColType>): CsvSpecs.Builder {
332324
for ((colName, colType) in colTypes) {
333325
if (colName == ColType.DEFAULT) continue
334-
putParserForName(colName, colType.toCsvParser(useDeepHavenLocalDateTime))
326+
putParserForName(colName, colType.toCsvParser())
335327
}
328+
// BOOLEAN, INT, LONG, DOUBLE, CHAR, STRING
329+
val defaultParsers = Parsers.DEFAULT - Parsers.DATETIME
336330
val skipTypes = parserOptions?.skipTypes ?: DataFrame.parser.skipTypes
337331
val parsersToUse = when {
338332
ColType.DEFAULT in colTypes ->
339-
listOf(colTypes[ColType.DEFAULT]!!.toCsvParser(useDeepHavenLocalDateTime))
333+
listOf(colTypes[ColType.DEFAULT]!!.toCsvParser(), Parsers.STRING)
340334

341335
skipTypes.isNotEmpty() -> {
342336
val parsersToSkip = skipTypes
343-
.mapNotNull { it.toColType().toCsvParserOrNull(useDeepHavenLocalDateTime) }
344-
Parsers.DEFAULT.toSet() - parsersToSkip.toSet()
337+
.mapNotNull { it.toColType().toCsvParserOrNull() }
338+
defaultParsers.toSet() - parsersToSkip.toSet()
345339
}
346340

347-
else -> Parsers.DEFAULT // BOOLEAN, INT, LONG, DOUBLE, DATETIME, CHAR, STRING
341+
else -> defaultParsers
348342
}
349343
parsers(parsersToUse)
350344
return this
@@ -363,24 +357,22 @@ private fun CsvSpecs.Builder.header(header: List<String>): CsvSpecs.Builder =
363357
* Converts a [ColType] to a [Parser] from the Deephaven CSV library.
364358
* If no direct [Parser] exists, it returns `null`.
365359
*/
366-
internal fun ColType.toCsvParserOrNull(useDeepHavenLocalDateTime: Boolean): Parser<*>? =
360+
internal fun ColType.toCsvParserOrNull(): Parser<*>? =
367361
when (this) {
368362
ColType.Int -> Parsers.INT
369363
ColType.Long -> Parsers.LONG
370364
ColType.Double -> Parsers.DOUBLE
371365
ColType.Char -> Parsers.CHAR
372366
ColType.Boolean -> Parsers.BOOLEAN
373367
ColType.String -> Parsers.STRING
374-
ColType.LocalDateTime -> if (useDeepHavenLocalDateTime) Parsers.DATETIME else null
375368
else -> null
376369
}
377370

378371
/**
379372
* Converts a [ColType] to a [Parser] from the Deephaven CSV library.
380373
* If no direct [Parser] exists, it defaults to [Parsers.STRING] so that [DataFrame.parse] can handle it.
381374
*/
382-
internal fun ColType.toCsvParser(useDeepHavenLocalDateTime: Boolean): Parser<*> =
383-
toCsvParserOrNull(useDeepHavenLocalDateTime) ?: Parsers.STRING
375+
internal fun ColType.toCsvParser(): Parser<*> = toCsvParserOrNull() ?: Parsers.STRING
384376

385377
internal fun KType.toColType(): ColType =
386378
when (this.withNullability(false)) {

dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ import kotlin.reflect.typeOf
1212

1313
public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITER) : SupportedDataFrameFormat {
1414
override fun readDataFrame(stream: InputStream, header: List<String>): DataFrame<*> =
15-
DataFrame.readCsv(inputStream = stream, header = header)
15+
DataFrame.readCsv(inputStream = stream, header = header, delimiter = delimiter)
1616

1717
override fun readDataFrame(file: File, header: List<String>): DataFrame<*> =
18-
DataFrame.readCsv(file = file, header = header)
18+
DataFrame.readCsv(file = file, header = header, delimiter = delimiter)
1919

2020
override fun acceptsExtension(ext: String): Boolean = ext == "csv"
2121

0 commit comments

Comments
 (0)