Skip to content

Commit 7d64522

Browse files
authored
Fixing skip_rows bug and adding scanCSV options (pola-rs#147)
* Fixing skip_rows bug * Bun linting changes * Adding ReadCsvOptions options
1 parent 8389398 commit 7d64522

23 files changed

+410
-359
lines changed

__tests__/examples/datasets/empty.csv

Whitespace-only changes.

__tests__/io.test.ts

+43-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import fs from "fs";
55
// eslint-disable-next-line no-undef
66
const csvpath = path.resolve(__dirname, "./examples/datasets/foods1.csv");
77
// eslint-disable-next-line no-undef
8+
const emptycsvpath = path.resolve(__dirname, "./examples/datasets/empty.csv");
9+
// eslint-disable-next-line no-undef
810
const parquetpath = path.resolve(__dirname, "./examples/foods.parquet");
911
// eslint-disable-next-line no-undef
1012
const avropath = path.resolve(__dirname, "./examples/foods.avro");
@@ -63,13 +65,35 @@ describe("read:csv", () => {
6365
csvBuffer.toString("utf-8").slice(0, 22),
6466
);
6567
});
68+
it("can read csv with ragged lines", () => {
69+
const csvBuffer = Buffer.from("A\nB\nC,ragged\n", "utf-8");
70+
let df = pl.readCSV(csvBuffer);
71+
const expected = `shape: (2, 1)
72+
┌─────┐
73+
│ A │
74+
│ --- │
75+
│ str │
76+
╞═════╡
77+
│ B │
78+
│ C │
79+
└─────┘`;
80+
expect(df.toString()).toEqual(expected);
81+
const f = () => {
82+
df = pl.readCSV(csvBuffer, { truncateRaggedLines: false });
83+
};
84+
expect(f).toThrow();
85+
});
86+
it("can load empty csv", () => {
87+
const df = pl.readCSV(emptycsvpath, { raiseIfEmpty: false });
88+
expect(df.shape).toEqual({ height: 0, width: 0 });
89+
});
6690
it("can parse datetimes", () => {
6791
const csv = `timestamp,open,high
6892
2021-01-01 00:00:00,0.00305500,0.00306000
6993
2021-01-01 00:15:00,0.00298800,0.00300400
7094
2021-01-01 00:30:00,0.00298300,0.00300100
7195
2021-01-01 00:45:00,0.00299400,0.00304000`;
72-
const df = pl.readCSV(csv, { parseDates: true });
96+
const df = pl.readCSV(csv, { tryParseDates: true });
7397
expect(df.dtypes.map((dt) => dt.toJSON())).toEqual([
7498
pl.Datetime("us").toJSON(),
7599
pl.Float64.toJSON(),
@@ -159,21 +183,36 @@ describe("scan", () => {
159183
expect(df.shape).toEqual({ height: 27, width: 4 });
160184
});
161185
it("can lazy load (scan) from a csv file with options", () => {
162-
const df = pl
186+
let df = pl
163187
.scanCSV(csvpath, {
164188
hasHeader: false,
165-
skipRows: 1,
189+
skipRows: 2,
166190
nRows: 4,
167191
})
168192
.collectSync();
169193

170194
expect(df.shape).toEqual({ height: 4, width: 4 });
195+
196+
df = pl
197+
.scanCSV(csvpath, {
198+
hasHeader: true,
199+
skipRows: 2,
200+
nRows: 4,
201+
})
202+
.collectSync();
203+
204+
expect(df.shape).toEqual({ height: 4, width: 4 });
205+
});
206+
207+
it("can lazy load empty csv", () => {
208+
const df = pl.scanCSV(emptycsvpath, { raiseIfEmpty: false }).collectSync();
209+
expect(df.shape).toEqual({ height: 0, width: 0 });
171210
});
172211

173212
it("can lazy load (scan) from a parquet file with options", () => {
174213
pl.readCSV(csvpath, {
175214
hasHeader: false,
176-
skipRows: 1,
215+
skipRows: 2,
177216
nRows: 4,
178217
}).writeParquet(parquetpath);
179218

__tests__/setup.ts

+25-30
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,15 @@ expect.extend({
99
message: () => "series matches",
1010
pass: true,
1111
};
12-
} else {
13-
return {
14-
message: () => `
12+
}
13+
return {
14+
message: () => `
1515
Expected:
1616
>>${expected}
1717
Received:
1818
>>${actual}`,
19-
pass: false,
20-
};
21-
}
19+
pass: false,
20+
};
2221
},
2322
toSeriesEqual(actual, expected) {
2423
const pass = actual.seriesEqual(expected);
@@ -27,16 +26,15 @@ Received:
2726
message: () => "series matches",
2827
pass: true,
2928
};
30-
} else {
31-
return {
32-
message: () => `
29+
}
30+
return {
31+
message: () => `
3332
Expected:
3433
>>${expected}
3534
Received:
3635
>>${actual}`,
37-
pass: false,
38-
};
39-
}
36+
pass: false,
37+
};
4038
},
4139
toFrameEqual(actual, expected, nullEqual?) {
4240
const pass = actual.frameEqual(expected, nullEqual);
@@ -45,16 +43,15 @@ Received:
4543
message: () => "dataframes match",
4644
pass: true,
4745
};
48-
} else {
49-
return {
50-
message: () => `
46+
}
47+
return {
48+
message: () => `
5149
Expected:
5250
>>${expected}
5351
Received:
5452
>>${actual}`,
55-
pass: false,
56-
};
57-
}
53+
pass: false,
54+
};
5855
},
5956
toFrameStrictEqual(actual, expected) {
6057
const frameEq = actual.frameEqual(expected);
@@ -64,16 +61,15 @@ Received:
6461
message: () => "dataframes match",
6562
pass: true,
6663
};
67-
} else {
68-
return {
69-
message: () => `
64+
}
65+
return {
66+
message: () => `
7067
Expected:
7168
>>${expected}
7269
Received:
7370
>>${actual}`,
74-
pass: false,
75-
};
76-
}
71+
pass: false,
72+
};
7773
},
7874
toFrameEqualIgnoringOrder(act: pl.DataFrame, exp: pl.DataFrame) {
7975
const actual = act.sort(act.columns.sort());
@@ -84,16 +80,15 @@ Received:
8480
message: () => "dataframes match",
8581
pass: true,
8682
};
87-
} else {
88-
return {
89-
message: () => `
83+
}
84+
return {
85+
message: () => `
9086
Expected:
9187
>>${expected}
9288
Received:
9389
>>${actual}`,
94-
pass: false,
95-
};
96-
}
90+
pass: false,
91+
};
9792
},
9893
});
9994

polars/dataframe.ts

+18-31
Original file line numberDiff line numberDiff line change
@@ -1708,9 +1708,8 @@ export interface DataFrame
17081708
function prepareOtherArg(anyValue: any): Series {
17091709
if (Series.isSeries(anyValue)) {
17101710
return anyValue;
1711-
} else {
1712-
return Series([anyValue]) as Series;
17131711
}
1712+
return Series([anyValue]) as Series;
17141713
}
17151714

17161715
function map(df: DataFrame, fn: (...args: any[]) => any[]) {
@@ -1841,9 +1840,8 @@ export const _DataFrame = (_df: any): DataFrame => {
18411840
df.getColumns().map((s) => {
18421841
if (s.isNumeric() || s.isBoolean()) {
18431842
return s.cast(DataType.Float64);
1844-
} else {
1845-
return s;
18461843
}
1844+
return s;
18471845
}),
18481846
);
18491847
};
@@ -1877,9 +1875,8 @@ export const _DataFrame = (_df: any): DataFrame => {
18771875
dropNulls(...subset) {
18781876
if (subset.length) {
18791877
return wrap("dropNulls", subset.flat(2));
1880-
} else {
1881-
return wrap("dropNulls");
18821878
}
1879+
return wrap("dropNulls");
18831880
},
18841881
distinct(opts: any = false, subset?, keep = "first") {
18851882
return this.unique(opts, subset);
@@ -2037,9 +2034,8 @@ export const _DataFrame = (_df: any): DataFrame => {
20372034
max(axis = 0) {
20382035
if (axis === 1) {
20392036
return _Series(_df.hmax() as any) as any;
2040-
} else {
2041-
return wrap("max");
20422037
}
2038+
return wrap("max");
20432039
},
20442040
mean(axis = 0, nullStrategy = "ignore") {
20452041
if (axis === 1) {
@@ -2057,9 +2053,8 @@ export const _DataFrame = (_df: any): DataFrame => {
20572053
min(axis = 0) {
20582054
if (axis === 1) {
20592055
return _Series(_df.hmin() as any) as any;
2060-
} else {
2061-
return wrap("min");
20622056
}
2057+
return wrap("min");
20632058
},
20642059
nChunks() {
20652060
return _df.nChunks();
@@ -2168,28 +2163,25 @@ export const _DataFrame = (_df: any): DataFrame => {
21682163
false,
21692164
seed,
21702165
);
2171-
} else {
2172-
throw new TypeError("must specify either 'frac' or 'n'");
21732166
}
2167+
throw new TypeError("must specify either 'frac' or 'n'");
21742168
},
21752169
select(...selection) {
21762170
const hasExpr = selection.flat().some((s) => Expr.isExpr(s));
21772171
if (hasExpr) {
21782172
return _DataFrame(_df).lazy().select(selection).collectSync();
2179-
} else {
2180-
return wrap("select", columnOrColumnsStrict(selection as any));
21812173
}
2174+
return wrap("select", columnOrColumnsStrict(selection as any));
21822175
},
21832176
shift: (opt) => wrap("shift", opt?.periods ?? opt),
21842177
shiftAndFill(n: any, fillValue?: number | undefined) {
21852178
if (typeof n === "number" && fillValue) {
21862179
return _DataFrame(_df).lazy().shiftAndFill(n, fillValue).collectSync();
2187-
} else {
2188-
return _DataFrame(_df)
2189-
.lazy()
2190-
.shiftAndFill(n.n, n.fillValue)
2191-
.collectSync();
21922180
}
2181+
return _DataFrame(_df)
2182+
.lazy()
2183+
.shiftAndFill(n.n, n.fillValue)
2184+
.collectSync();
21932185
},
21942186
shrinkToFit(inPlace: any = false): any {
21952187
if (inPlace) {
@@ -2408,9 +2400,8 @@ export const _DataFrame = (_df: any): DataFrame => {
24082400
}
24092401
if (!options?.columnNames) {
24102402
return wrap("transpose", keep_names_as, undefined);
2411-
} else {
2412-
return wrap("transpose", keep_names_as, options.columnNames);
24132403
}
2404+
return wrap("transpose", keep_names_as, options.columnNames);
24142405
},
24152406
unnest(names) {
24162407
names = Array.isArray(names) ? names : [names];
@@ -2428,28 +2419,25 @@ export const _DataFrame = (_df: any): DataFrame => {
24282419
withColumn(column: Series | Expr) {
24292420
if (Series.isSeries(column)) {
24302421
return wrap("withColumn", column.inner());
2431-
} else {
2432-
return this.withColumns(column);
24332422
}
2423+
return this.withColumns(column);
24342424
},
24352425
withColumns(...columns: (Expr | Series)[]) {
24362426
if (isSeriesArray(columns)) {
24372427
return columns.reduce(
24382428
(acc, curr) => acc.withColumn(curr),
24392429
_DataFrame(_df),
24402430
);
2441-
} else {
2442-
return this.lazy()
2443-
.withColumns(columns)
2444-
.collectSync({ noOptimization: true, stringCache: false });
24452431
}
2432+
return this.lazy()
2433+
.withColumns(columns)
2434+
.collectSync({ noOptimization: true, stringCache: false });
24462435
},
24472436
withColumnRenamed(opt, replacement?) {
24482437
if (typeof opt === "string") {
24492438
return this.rename({ [opt]: replacement });
2450-
} else {
2451-
return this.rename({ [opt.existing]: opt.replacement });
24522439
}
2440+
return this.rename({ [opt.existing]: opt.replacement });
24532441
},
24542442
withRowCount(name = "row_nr") {
24552443
return wrap("withRowCount", name);
@@ -2477,9 +2465,8 @@ export const _DataFrame = (_df: any): DataFrame => {
24772465
}
24782466
if (typeof prop !== "symbol" && !Number.isNaN(Number(prop))) {
24792467
return target.row(Number(prop));
2480-
} else {
2481-
return Reflect.get(target, prop, receiver);
24822468
}
2469+
return Reflect.get(target, prop, receiver);
24832470
},
24842471
set(target: DataFrame, prop, receiver) {
24852472
if (Series.isSeries(receiver)) {

0 commit comments

Comments
 (0)