Skip to content

Commit 8ac81c6

Browse files
authored
Setting inferSchemaLength = 0 when null (pola-rs#285)
Setting inferSchemaLength = 0 when null to close pola-rs#279
1 parent 64a3d63 commit 8ac81c6

File tree

4 files changed

+21
-4
lines changed

4 files changed

+21
-4
lines changed

__tests__/io.test.ts

+8-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ describe("read:csv", () => {
2424
expect(df.shape).toEqual({ height: 27, width: 4 });
2525
});
2626
it("can read from a csv file with inferSchemaLength = 0 option", () => {
27-
const df = pl.readCSV(csvpath, { inferSchemaLength: 0 });
27+
let df = pl.readCSV(csvpath, { inferSchemaLength: 0 });
2828
const expected = `shape: (1, 4)
2929
┌────────────┬──────────┬────────┬──────────┐
3030
│ category ┆ calories ┆ fats_g ┆ sugars_g │
@@ -34,6 +34,8 @@ describe("read:csv", () => {
3434
│ vegetables ┆ 45 ┆ 0.5 ┆ 2 │
3535
└────────────┴──────────┴────────┴──────────┘`;
3636
expect(df.head(1).toString()).toEqual(expected);
37+
df = pl.readCSV(csvpath, { inferSchemaLength: null });
38+
expect(df.head(1).toString()).toEqual(expected);
3739
});
3840
it("can read from a csv file with options", () => {
3941
const df = pl.readCSV(csvpath, { hasHeader: false, skipRows: 1, nRows: 4 });
@@ -154,7 +156,11 @@ describe("read:json", () => {
154156
expect(df.shape).toEqual({ height: 27, width: 4 });
155157
});
156158
it("can specify read options", () => {
157-
const df = pl.readJSON(jsonpath, { batchSize: 10, inferSchemaLength: 100 });
159+
let df = pl.readJSON(jsonpath, { batchSize: 10, inferSchemaLength: 100 });
160+
expect(df.shape).toEqual({ height: 27, width: 4 });
161+
df = pl.readJSON(jsonpath, { batchSize: 10, inferSchemaLength: null });
162+
expect(df.shape).toEqual({ height: 27, width: 4 });
163+
df = pl.readJSON(jsonpath, { batchSize: 10, inferSchemaLength: 0 });
158164
expect(df.shape).toEqual({ height: 27, width: 4 });
159165
});
160166
it("can read from a json buffer", () => {

polars/io.ts

+11
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,9 @@ export function readCSV(pathOrBody, options?) {
186186
options = { ...readCsvDefaultOptions, ...options };
187187
const extensions = [".tsv", ".csv"];
188188

189+
// Handle If set to `null` case
190+
options.inferSchemaLength = options.inferSchemaLength ?? 0;
191+
189192
if (Buffer.isBuffer(pathOrBody)) {
190193
return _DataFrame(pli.readCsv(pathOrBody, options));
191194
}
@@ -275,6 +278,8 @@ export function scanCSV(
275278
export function scanCSV(path, options?) {
276279
options = { ...scanCsvDefaultOptions, ...options };
277280

281+
// Handle If set to `null` case
282+
options.inferSchemaLength = options.inferSchemaLength ?? 0;
278283
return _LazyDataFrame(pli.scanCsv(path, options));
279284
}
280285
/**
@@ -320,6 +325,10 @@ export function readJSON(
320325
options = { ...readJsonDefaultOptions, ...options };
321326
const method = options.format === "lines" ? pli.readJsonLines : pli.readJson;
322327
const extensions = [".ndjson", ".json", ".jsonl"];
328+
329+
// Handle If set to `null` case
330+
options.inferSchemaLength = options.inferSchemaLength ?? 0;
331+
323332
if (Buffer.isBuffer(pathOrBody)) {
324333
return _DataFrame(pli.readJson(pathOrBody, options));
325334
}
@@ -382,6 +391,8 @@ export function scanJson(
382391
export function scanJson(path: string, options?: Partial<ScanJsonOptions>) {
383392
options = { ...readJsonDefaultOptions, ...options };
384393

394+
// Handle If set to `null` case
395+
options.inferSchemaLength = options.inferSchemaLength ?? 0;
385396
return _LazyDataFrame(pli.scanJson(path, options));
386397
}
387398

polars/lazy/expr/string.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ export interface StringNamespace extends StringFunctions<Expr> {
164164
* @see https://goessner.net/articles/JsonPath/
165165
* @param jsonPath - A valid JSON path query string
166166
* @param dtype - The dtype to cast the extracted value to. If None, the dtype will be inferred from the JSON value.
167-
* @param inferSchemaLength - How many rows to parse to determine the schema. If ``None`` all rows are used.
167+
* @param inferSchemaLength - How many rows to parse to determine the schema. If `null` all rows are used.
168168
* @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing.
169169
* @example
170170
* ```

polars/series/string.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ export interface StringNamespace extends StringFunctions<Series> {
132132
* @see https://goessner.net/articles/JsonPath/
133133
* @param jsonPath - A valid JSON path query string
134134
* @param dtype - The dtype to cast the extracted value to. If None, the dtype will be inferred from the JSON value.
135-
* @param inferSchemaLength - How many rows to parse to determine the schema. If ``None`` all rows are used.
135+
* @param inferSchemaLength - How many rows to parse to determine the schema. If ``null`` all rows are used.
136136
* @returns Utf8 array. Contain null if original value is null or the `jsonPath` return nothing.
137137
* @example
138138
* ```

0 commit comments

Comments
 (0)