Skip to content

Commit 97555d6

Browse files
tophtuckermbostock
andauthored
Add file.csv({typed: "auto"}) option that uses inferSchema (#360)
* add new typed: auto option that uses inferSchema and coerceRow * matches d3 api slightly better i guess * update readme * Don't do typed: auto if array option is passed Co-authored-by: Mike Bostock <[email protected]> * check for columns in inferSchema; pull out enforceSchema function which propagates schema * test that inferSchema looks at source.columns * test enforceSchema * dont pass columns now that its the internal default of inferSchema * fewer defaults, more explicit --------- Co-authored-by: Mike Bostock <[email protected]>
1 parent fd48793 commit 97555d6

File tree

4 files changed

+48
-8
lines changed

4 files changed

+48
-8
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ Returns a promise to the file’s contents, parsed as tab-separated values (TSV)
332332
const data = await FileAttachment("cars.tsv").tsv();
333333
```
334334

335-
If <i>array</i> is true, an array of arrays is returned; otherwise, the first row is assumed to be the header row and an array of objects is returned, and the returned array has a <i>data</i>.columns property that is an array of column names. (See <a href="https://github.com/d3/d3-dsv/blob/main/README.md#dsv_parseRows">d3.tsvParseRows</a>.) If <i>typed</i> is true, [automatic type inference](https://observablehq.com/@d3/d3-autotype) is applied; only use this feature if you know your data is compatible.
335+
If <i>array</i> is true, an array of arrays is returned; otherwise, the first row is assumed to be the header row and an array of objects is returned, and the returned array has a <i>data</i>.columns property that is an array of column names. (See <a href="https://github.com/d3/d3-dsv/blob/main/README.md#dsv_parseRows">d3.tsvParseRows</a>.) If <i>typed</i> is true, [automatic type inference](https://observablehq.com/@d3/d3-autotype) is applied to each row independently; if <i>typed</i> is “auto”, the type inference is based on a sample of rows. Only use this feature if you know your data is compatible.
336336

337337
<a href="#attachment_image" name="attachment_image">#</a> *attachment*.<b>image</b>(<i>options</i>) [<>](https://github.com/observablehq/stdlib/blob/main/src/fileAttachment.js "Source")
338338

src/fileAttachment.js

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import {autoType, csvParse, csvParseRows, tsvParse, tsvParseRows} from "d3-dsv";
22
import {arrow4, arrow9, arrow11, jszip, exceljs} from "./dependencies.js";
33
import {cdn, requireDefault} from "./require.js";
44
import {SQLiteDatabaseClient} from "./sqlite.js";
5+
import {inferSchema, coerceRow} from "./table.js";
56
import {Workbook} from "./xlsx.js";
67

78
async function remote_fetch(file) {
@@ -10,11 +11,21 @@ async function remote_fetch(file) {
1011
return response;
1112
}
1213

14+
export function enforceSchema(source, schema) {
15+
const types = new Map(schema.map(({name, type}) => [name, type]));
16+
return Object.assign(source.map(d => coerceRow(d, types, schema)), {schema});
17+
}
18+
1319
async function dsv(file, delimiter, {array = false, typed = false} = {}) {
1420
const text = await file.text();
15-
return (delimiter === "\t"
16-
? (array ? tsvParseRows : tsvParse)
17-
: (array ? csvParseRows : csvParse))(text, typed && autoType);
21+
const parse = (delimiter === "\t"
22+
? (array ? tsvParseRows : tsvParse)
23+
: (array ? csvParseRows : csvParse));
24+
if (typed === "auto" && !array) {
25+
const source = parse(text);
26+
return enforceSchema(source, inferSchema(source, source.columns));
27+
}
28+
return parse(text, typed && autoType);
1829
}
1930

2031
export class AbstractFile {

src/table.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ function sourceCache(loadSource) {
199199
const loadChartDataSource = sourceCache(async (source) => {
200200
if (source instanceof FileAttachment) {
201201
switch (source.mimeType) {
202-
case "text/csv": return source.csv({typed: true});
203-
case "text/tab-separated-values": return source.tsv({typed: true});
202+
case "text/csv": return source.csv({typed: "auto"});
203+
case "text/tab-separated-values": return source.tsv({typed: "auto"});
204204
case "application/json": return source.json();
205205
}
206206
throw new Error(`unsupported file type: ${source.mimeType}`);
@@ -626,7 +626,7 @@ export function __table(source, operations) {
626626
let {schema, columns} = source;
627627
let inferredSchema = false;
628628
if (!isQueryResultSetSchema(schema)) {
629-
schema = inferSchema(source, columns);
629+
schema = inferSchema(source, isQueryResultSetColumns(columns) ? columns : undefined);
630630
inferredSchema = true;
631631
}
632632
// Combine column types from schema with user-selected types in operations
@@ -785,7 +785,7 @@ export function __table(source, operations) {
785785
return source;
786786
}
787787

788-
function coerceRow(object, types, schema) {
788+
export function coerceRow(object, types, schema) {
789789
const coerced = {};
790790
for (const col of schema) {
791791
const type = types.get(col.name);

test/fileAttachments-test.js

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import assert from "assert";
2+
import {enforceSchema} from "../src/fileAttachment.js";
23
import {FileAttachments} from "../src/index.js";
4+
import {inferSchema} from "../src/table.js";
35

46
it("FileAttachments is exported by stdlib", () => {
57
assert.strictEqual(typeof FileAttachments, "function");
@@ -38,3 +40,30 @@ it("FileAttachment works with Promises that resolve to URLs", async () => {
3840
assert.strictEqual(file.constructor.name, "FileAttachment");
3941
assert.strictEqual(await file.url(), "https://example.com/otherfile.js");
4042
});
43+
44+
it("enforceSchema coerces an array of objects", () => {
45+
const source = [{a: "0", b: "1", c: "2"}];
46+
assert.deepStrictEqual(
47+
enforceSchema(source, inferSchema(source)),
48+
Object.assign(
49+
[{a: 0, b: 1, c: 2}],
50+
{schema: [
51+
{
52+
inferred: "integer",
53+
name: "a",
54+
type: "integer"
55+
},
56+
{
57+
inferred: "integer",
58+
name: "b",
59+
type: "integer"
60+
},
61+
{
62+
inferred: "integer",
63+
name: "c",
64+
type: "integer"
65+
}
66+
]}
67+
)
68+
);
69+
});

0 commit comments

Comments
 (0)