Skip to content

Commit 50ce472

Browse files
committed
deprecating apache-based csv implementation
1 parent f63c633 commit 50ce472

File tree

3 files changed

+122
-27
lines changed

3 files changed

+122
-27
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt

+56-25
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,21 @@ import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadCsvMethod
1919
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
2020
import org.jetbrains.kotlinx.dataframe.impl.api.parse
2121
import org.jetbrains.kotlinx.dataframe.impl.io.readDelimImpl
22+
import org.jetbrains.kotlinx.dataframe.util.APACHE_CSV
2223
import org.jetbrains.kotlinx.dataframe.util.AS_URL
2324
import org.jetbrains.kotlinx.dataframe.util.AS_URL_IMPORT
2425
import org.jetbrains.kotlinx.dataframe.util.AS_URL_REPLACE
2526
import org.jetbrains.kotlinx.dataframe.util.DF_READ_NO_CSV
2627
import org.jetbrains.kotlinx.dataframe.util.DF_READ_NO_CSV_REPLACE
28+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV
29+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_FILE_OR_URL_REPLACE
30+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_FILE_REPLACE
31+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_IMPORT
32+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_STREAM_REPLACE
33+
import org.jetbrains.kotlinx.dataframe.util.READ_CSV_URL_REPLACE
34+
import org.jetbrains.kotlinx.dataframe.util.READ_DELIM
35+
import org.jetbrains.kotlinx.dataframe.util.READ_DELIM_READER_REPLACE
36+
import org.jetbrains.kotlinx.dataframe.util.READ_DELIM_STREAM_REPLACE
2737
import org.jetbrains.kotlinx.dataframe.values
2838
import java.io.BufferedInputStream
2939
import java.io.BufferedReader
@@ -46,6 +56,10 @@ import kotlin.reflect.KType
4656
import kotlin.reflect.typeOf
4757
import kotlin.time.Duration
4858

59+
@Deprecated(
60+
message = APACHE_CSV,
61+
level = DeprecationLevel.WARNING,
62+
)
4963
public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat {
5064
override fun readDataFrame(stream: InputStream, header: List<String>): AnyFrame =
5165
DataFrame.readCSV(stream = stream, delimiter = delimiter, header = header)
@@ -57,14 +71,18 @@ public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat {
5771

5872
override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough
5973

60-
override val testOrder: Int = 20000
74+
override val testOrder: Int = 20_000
6175

6276
override fun createDefaultReadMethod(pathRepresentation: String?): DefaultReadDfMethod {
6377
val arguments = MethodArguments().add("delimiter", typeOf<Char>(), "'%L'", delimiter)
6478
return DefaultReadCsvMethod(pathRepresentation, arguments)
6579
}
6680
}
6781

82+
@Deprecated(
83+
message = APACHE_CSV,
84+
level = DeprecationLevel.WARNING,
85+
)
6886
public enum class CSVType(public val format: CSVFormat) {
6987
DEFAULT(
7088
CSVFormat.DEFAULT.builder()
@@ -87,6 +105,10 @@ internal fun isCompressed(file: File) = listOf("gz", "zip").contains(file.extens
87105

88106
internal fun isCompressed(url: URL) = isCompressed(url.path)
89107

108+
@Deprecated(
109+
message = APACHE_CSV,
110+
level = DeprecationLevel.HIDDEN, // clashes with the new readDelim
111+
)
90112
@Refine
91113
@Interpretable("ReadDelimStr")
92114
public fun DataFrame.Companion.readDelimStr(
@@ -106,7 +128,7 @@ public fun DataFrame.Companion.readDelimStr(
106128

107129
@Deprecated(
108130
message = DF_READ_NO_CSV,
109-
replaceWith = ReplaceWith(DF_READ_NO_CSV_REPLACE),
131+
replaceWith = ReplaceWith(DF_READ_NO_CSV_REPLACE, READ_CSV_IMPORT),
110132
level = DeprecationLevel.ERROR,
111133
)
112134
public fun DataFrame.Companion.read(
@@ -118,22 +140,13 @@ public fun DataFrame.Companion.read(
118140
readLines: Int? = null,
119141
duplicate: Boolean = true,
120142
charset: Charset = Charsets.UTF_8,
121-
): DataFrame<*> =
122-
catchHttpResponse(asUrl(fileOrUrl)) {
123-
readDelim(
124-
it,
125-
delimiter,
126-
header,
127-
isCompressed(fileOrUrl),
128-
getCSVType(fileOrUrl),
129-
colTypes,
130-
skipLines,
131-
readLines,
132-
duplicate,
133-
charset,
134-
)
135-
}
143+
): DataFrame<*> = error(DF_READ_NO_CSV)
136144

145+
@Deprecated(
146+
message = READ_CSV,
147+
replaceWith = ReplaceWith(READ_CSV_FILE_OR_URL_REPLACE, READ_CSV_IMPORT),
148+
level = DeprecationLevel.WARNING,
149+
)
137150
@OptInRefine
138151
@Interpretable("ReadCSV0")
139152
public fun DataFrame.Companion.readCSV(
@@ -163,6 +176,11 @@ public fun DataFrame.Companion.readCSV(
163176
)
164177
}
165178

179+
@Deprecated(
180+
message = READ_CSV,
181+
replaceWith = ReplaceWith(READ_CSV_FILE_REPLACE, READ_CSV_IMPORT),
182+
level = DeprecationLevel.WARNING,
183+
)
166184
public fun DataFrame.Companion.readCSV(
167185
file: File,
168186
delimiter: Char = ',',
@@ -188,6 +206,11 @@ public fun DataFrame.Companion.readCSV(
188206
parserOptions,
189207
)
190208

209+
@Deprecated(
210+
message = READ_CSV,
211+
replaceWith = ReplaceWith(READ_CSV_URL_REPLACE, READ_CSV_IMPORT),
212+
level = DeprecationLevel.WARNING,
213+
)
191214
public fun DataFrame.Companion.readCSV(
192215
url: URL,
193216
delimiter: Char = ',',
@@ -212,6 +235,11 @@ public fun DataFrame.Companion.readCSV(
212235
parserOptions,
213236
)
214237

238+
@Deprecated(
239+
message = READ_CSV,
240+
replaceWith = ReplaceWith(READ_CSV_STREAM_REPLACE, READ_CSV_IMPORT),
241+
level = DeprecationLevel.WARNING,
242+
)
215243
public fun DataFrame.Companion.readCSV(
216244
stream: InputStream,
217245
delimiter: Char = ',',
@@ -238,13 +266,6 @@ public fun DataFrame.Companion.readCSV(
238266
parserOptions,
239267
)
240268

241-
private fun getCSVType(path: String): CSVType =
242-
when (path.substringAfterLast('.').lowercase()) {
243-
"csv" -> CSVType.DEFAULT
244-
"tdf" -> CSVType.TDF
245-
else -> throw IOException("Unknown file format")
246-
}
247-
248269
@Deprecated(
249270
message = AS_URL,
250271
replaceWith = ReplaceWith(AS_URL_REPLACE, AS_URL_IMPORT),
@@ -264,6 +285,11 @@ private fun getFormat(
264285
.setAllowMissingColumnNames(duplicate)
265286
.build()
266287

288+
@Deprecated(
289+
message = READ_DELIM,
290+
replaceWith = ReplaceWith(READ_DELIM_STREAM_REPLACE),
291+
level = DeprecationLevel.WARNING,
292+
)
267293
public fun DataFrame.Companion.readDelim(
268294
inStream: InputStream,
269295
delimiter: Char = ',',
@@ -343,6 +369,11 @@ public fun ColType.toKType(): KType =
343369
ColType.Char -> typeOf<Char>()
344370
}
345371

372+
@Deprecated(
373+
message = READ_DELIM,
374+
replaceWith = ReplaceWith(READ_DELIM_READER_REPLACE),
375+
level = DeprecationLevel.WARNING,
376+
)
346377
public fun DataFrame.Companion.readDelim(
347378
reader: Reader,
348379
format: CSVFormat = CSVFormat.DEFAULT.builder()
@@ -370,7 +401,7 @@ public fun DataFrame.Companion.readDelim(
370401
"`DataFrame.readCSV()`.",
371402
)
372403
}
373-
404+
//TODO deprecate
374405
public fun AnyFrame.writeCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT): Unit =
375406
writeCSV(FileWriter(file), format)
376407

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt

+31
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,23 @@ import org.jetbrains.kotlinx.dataframe.DataFrame
55
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
66
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod
77
import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadTsvMethod
8+
import org.jetbrains.kotlinx.dataframe.util.APACHE_CSV
9+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV
10+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_FILE_OR_URL_REPLACE
11+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_FILE_REPLACE
12+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_IMPORT
13+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_STREAM_REPLACE
14+
import org.jetbrains.kotlinx.dataframe.util.READ_TSV_URL_REPLACE
815
import java.io.File
916
import java.io.FileInputStream
1017
import java.io.InputStream
1118
import java.net.URL
1219
import java.nio.charset.Charset
1320

21+
@Deprecated(
22+
message = APACHE_CSV,
23+
level = DeprecationLevel.WARNING,
24+
)
1425
public class TSV : SupportedDataFrameFormat {
1526
override fun readDataFrame(stream: InputStream, header: List<String>): AnyFrame =
1627
DataFrame.readTSV(stream, header = header)
@@ -29,6 +40,11 @@ public class TSV : SupportedDataFrameFormat {
2940

3041
private const val TAB_CHAR = '\t'
3142

43+
@Deprecated(
44+
message = READ_TSV,
45+
replaceWith = ReplaceWith(READ_TSV_FILE_OR_URL_REPLACE, READ_TSV_IMPORT),
46+
level = DeprecationLevel.WARNING,
47+
)
3248
public fun DataFrame.Companion.readTSV(
3349
fileOrUrl: String,
3450
header: List<String> = listOf(),
@@ -55,6 +71,11 @@ public fun DataFrame.Companion.readTSV(
5571
)
5672
}
5773

74+
@Deprecated(
75+
message = READ_TSV,
76+
replaceWith = ReplaceWith(READ_TSV_FILE_REPLACE, READ_TSV_IMPORT),
77+
level = DeprecationLevel.WARNING,
78+
)
5879
public fun DataFrame.Companion.readTSV(
5980
file: File,
6081
header: List<String> = listOf(),
@@ -77,6 +98,11 @@ public fun DataFrame.Companion.readTSV(
7798
charset,
7899
)
79100

101+
@Deprecated(
102+
message = READ_TSV,
103+
replaceWith = ReplaceWith(READ_TSV_URL_REPLACE, READ_TSV_IMPORT),
104+
level = DeprecationLevel.WARNING,
105+
)
80106
public fun DataFrame.Companion.readTSV(
81107
url: URL,
82108
header: List<String> = listOf(),
@@ -99,6 +125,11 @@ public fun DataFrame.Companion.readTSV(
99125
parserOptions,
100126
)
101127

128+
@Deprecated(
129+
message = READ_TSV,
130+
replaceWith = ReplaceWith(READ_TSV_STREAM_REPLACE, READ_TSV_IMPORT),
131+
level = DeprecationLevel.WARNING,
132+
)
102133
public fun DataFrame.Companion.readTSV(
103134
stream: InputStream,
104135
header: List<String> = listOf(),

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt

+35-2
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ package org.jetbrains.kotlinx.dataframe.util
1111

1212
private const val MESSAGE_0_16 = "Will be ERROR in 0.16."
1313

14-
internal const val DF_READ_NO_CSV = "This function is deprecated and should be replaced with `readCSV`. $MESSAGE_0_16"
14+
internal const val DF_READ_NO_CSV = "This function is deprecated and should be replaced with `readCsv`. $MESSAGE_0_16"
1515
internal const val DF_READ_NO_CSV_REPLACE =
16-
"this.readCSV(fileOrUrl, delimiter, header, colTypes, skipLines, readLines, duplicate, charset)"
16+
"this.readCsv(fileOrUrl = fileOrUrl, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
1717

1818
internal const val CREATE_FRAME_COLUMN =
1919
"Removed from public API as this can likely better be solved by `DataFrame.chunked()`. Replaced by internal df.chunkedImpl(). $MESSAGE_0_16"
@@ -66,6 +66,39 @@ internal const val MINUS_REPLACE = "this.remove(columns)"
6666

6767
private const val MESSAGE_0_17 = "Will be ERROR in 0.17."
6868

69+
internal const val APACHE_CSV =
70+
"The Apache-based CSV/TSV reader is deprecated in favor of the new Deephaven CSV reader. $MESSAGE_0_17"
71+
internal const val READ_CSV =
72+
"Apache-based readCSV() is deprecated in favor of Deephaven-based readCsv(). $MESSAGE_0_17"
73+
internal const val READ_CSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.readCsv"
74+
internal const val READ_CSV_FILE_OR_URL_REPLACE =
75+
"this.readCsv(fileOrUrl = fileOrUrl, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
76+
internal const val READ_CSV_FILE_REPLACE =
77+
"this.readCsv(file = file, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
78+
internal const val READ_CSV_URL_REPLACE =
79+
"this.readCsv(url = url, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
80+
internal const val READ_CSV_STREAM_REPLACE =
81+
"this.readCsv(inputStream = stream, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
82+
83+
internal const val READ_DELIM =
84+
"Apache-based readDelim() is deprecated in favor of Deephaven-based readDelim(). $MESSAGE_0_17"
85+
internal const val READ_DELIM_STREAM_REPLACE =
86+
"this.readDelim(inputStream = inStream, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
87+
internal const val READ_DELIM_READER_REPLACE =
88+
"this.readDelimStr(text = reader.readText(), delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
89+
90+
internal const val READ_TSV =
91+
"Apache-based readTSV() is deprecated in favor of Deephaven-based readTsv(). $MESSAGE_0_17"
92+
internal const val READ_TSV_IMPORT = "org.jetbrains.kotlinx.dataframe.io.readTsv"
93+
internal const val READ_TSV_FILE_OR_URL_REPLACE =
94+
"this.readTsv(fileOrUrl = fileOrUrl, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
95+
internal const val READ_TSV_FILE_REPLACE =
96+
"this.readTsv(file = file, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
97+
internal const val READ_TSV_URL_REPLACE =
98+
"this.readTsv(url = url, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
99+
internal const val READ_TSV_STREAM_REPLACE =
100+
"this.readTsv(inputStream = stream, delimiter = delimiter, header = header, colTypes = colTypes, skipLines = skipLines, readLines = readLines, allowMissingColumns = duplicate, parserOptions = parserOptions)"
101+
69102
// endregion
70103

71104
// region keep across releases

0 commit comments

Comments
 (0)