1
1
import { ascending , descending , reverse } from "d3-array" ;
2
2
import { FileAttachment } from "./fileAttachment.js" ;
3
- import { isArrowTable } from "./arrow.js" ;
3
+ import { isArrowTable , loadArrow } from "./arrow.js" ;
4
4
import { DuckDBClient } from "./duckdb.js" ;
5
5
6
6
const nChecks = 20 ; // number of values to check in each array
@@ -143,43 +143,100 @@ function isTypedArray(value) {
143
143
144
144
// __query is used by table cells; __query.sql is used by SQL cells.
145
145
export const __query = Object . assign (
146
- async ( source , operations , invalidation ) => {
147
- source = await loadDataSource ( await source , "table" ) ;
146
+ async ( source , operations , invalidation , name ) => {
147
+ source = await loadTableDataSource ( await source , name ) ;
148
148
if ( isDatabaseClient ( source ) ) return evaluateQuery ( source , makeQueryTemplate ( operations , source ) , invalidation ) ;
149
149
if ( isDataArray ( source ) ) return __table ( source , operations ) ;
150
150
if ( ! source ) throw new Error ( "missing data source" ) ;
151
151
throw new Error ( "invalid data source" ) ;
152
152
} ,
153
153
{
154
- sql ( source , invalidation ) {
154
+ sql ( source , invalidation , name ) {
155
155
return async function ( ) {
156
- return evaluateQuery ( await loadDataSource ( await source , "sql" ) , arguments , invalidation ) ;
156
+ return evaluateQuery ( await loadSqlDataSource ( await source , name ) , arguments , invalidation ) ;
157
157
} ;
158
158
}
159
159
}
160
160
) ;
161
161
162
- export async function loadDataSource ( source , mode ) {
163
- if ( source instanceof FileAttachment ) {
164
- if ( mode === "table" ) {
165
- switch ( source . mimeType ) {
166
- case "text/csv" : return source . csv ( { typed : true } ) ;
167
- case "text/tab-separated-values" : return source . tsv ( { typed : true } ) ;
168
- case "application/json" : return source . json ( ) ;
169
- }
162
+ export async function loadDataSource ( source , mode , name ) {
163
+ switch ( mode ) {
164
+ case "table" : return loadTableDataSource ( source , name ) ;
165
+ case "sql" : return loadSqlDataSource ( source , name ) ;
166
+ }
167
+ return source ;
168
+ }
169
+
170
+ // We use a weak map to cache loaded data sources by key so that we don’t have
171
+ // to e.g. create separate SQLiteDatabaseClients every time we’re querying the
172
+ // same SQLite file attachment. Since this is a weak map, unused references will
173
+ // be garbage collected when they are no longer desired. Note: the name should
174
+ // be consistent, as it is not part of the cache key!
175
+ function sourceCache ( loadSource ) {
176
+ const cache = new WeakMap ( ) ;
177
+ return ( source , name ) => {
178
+ if ( ! source ) throw new Error ( "data source not found" ) ;
179
+ let promise = cache . get ( source ) ;
180
+ if ( ! promise ) {
181
+ // Warning: do not await here! We need to populate the cache synchronously.
182
+ promise = loadSource ( source , name ) ;
183
+ cache . set ( source , promise ) ;
170
184
}
171
- if ( mode === "table" || mode === "sql" ) {
172
- switch ( source . mimeType ) {
173
- case "application/x-sqlite3" : return source . sqlite ( ) ;
174
- }
175
- if ( / \. a r r o w $ / i. test ( source . name ) ) return DuckDBClient . of ( { __table : await source . arrow ( { version : 9 } ) } ) ;
185
+ return promise ;
186
+ } ;
187
+ }
188
+
189
+ const loadTableDataSource = sourceCache ( async ( source , name ) => {
190
+ if ( source instanceof FileAttachment ) {
191
+ switch ( source . mimeType ) {
192
+ case "text/csv" : return source . csv ( { typed : true } ) ;
193
+ case "text/tab-separated-values" : return source . tsv ( { typed : true } ) ;
194
+ case "application/json" : return source . json ( ) ;
195
+ case "application/x-sqlite3" : return source . sqlite ( ) ;
176
196
}
197
+ if ( / \. ( a r r o w | p a r q u e t ) $ / i. test ( source . name ) ) return loadDuckDBClient ( source , name ) ;
177
198
throw new Error ( `unsupported file type: ${ source . mimeType } ` ) ;
178
199
}
179
- if ( ( mode === "table" || mode === "sql" ) && isArrowTable ( source ) ) {
180
- return DuckDBClient . of ( { __table : source } ) ;
200
+ if ( isArrowTable ( source ) ) return loadDuckDBClient ( source , name ) ;
201
+ return source ;
202
+ } ) ;
203
+
204
+ const loadSqlDataSource = sourceCache ( async ( source , name ) => {
205
+ if ( source instanceof FileAttachment ) {
206
+ switch ( source . mimeType ) {
207
+ case "text/csv" :
208
+ case "text/tab-separated-values" :
209
+ case "application/json" : return loadDuckDBClient ( source , name ) ;
210
+ case "application/x-sqlite3" : return source . sqlite ( ) ;
211
+ }
212
+ if ( / \. ( a r r o w | p a r q u e t ) $ / i. test ( source . name ) ) return loadDuckDBClient ( source , name ) ;
213
+ throw new Error ( `unsupported file type: ${ source . mimeType } ` ) ;
181
214
}
215
+ if ( isDataArray ( source ) ) return loadDuckDBClient ( await asArrowTable ( source , name ) , name ) ;
216
+ if ( isArrowTable ( source ) ) return loadDuckDBClient ( source , name ) ;
182
217
return source ;
218
+ } ) ;
219
+
220
+ async function asArrowTable ( array , name ) {
221
+ const arrow = await loadArrow ( ) ;
222
+ return arrayIsPrimitive ( array )
223
+ ? arrow . tableFromArrays ( { [ name ] : array } )
224
+ : arrow . tableFromJSON ( array ) ;
225
+ }
226
+
227
+ function loadDuckDBClient (
228
+ source ,
229
+ name = source instanceof FileAttachment
230
+ ? getFileSourceName ( source )
231
+ : "__table"
232
+ ) {
233
+ return DuckDBClient . of ( { [ name ] : source } ) ;
234
+ }
235
+
236
+ function getFileSourceName ( file ) {
237
+ return file . name
238
+ . replace ( / @ \d + (? = \. | $ ) / , "" ) // strip Observable file version number
239
+ . replace ( / \. \w + $ / , "" ) ; // strip file extension
183
240
}
184
241
185
242
async function evaluateQuery ( source , args , invalidation ) {
@@ -248,9 +305,9 @@ export function makeQueryTemplate(operations, source) {
248
305
throw new Error ( "missing from table" ) ;
249
306
if ( select . columns && select . columns . length === 0 )
250
307
throw new Error ( "at least one column must be selected" ) ;
251
- const columns = select . columns ? select . columns . map ( ( c ) => `t. ${ escaper ( c ) } ` ) : "*" ;
308
+ const columns = select . columns ? select . columns . map ( escaper ) . join ( ", " ) : "*" ;
252
309
const args = [
253
- [ `SELECT ${ columns } FROM ${ formatTable ( from . table , escaper ) } t ` ]
310
+ [ `SELECT ${ columns } FROM ${ formatTable ( from . table , escaper ) } ` ]
254
311
] ;
255
312
for ( let i = 0 ; i < filter . length ; ++ i ) {
256
313
appendSql ( i ? `\nAND ` : `\nWHERE ` , args ) ;
@@ -303,8 +360,9 @@ function formatTable(table, escaper) {
303
360
if ( table . schema != null ) from += escaper ( table . schema ) + "." ;
304
361
from += escaper ( table . table ) ;
305
362
return from ;
363
+ } else {
364
+ return escaper ( table ) ;
306
365
}
307
- return table ;
308
366
}
309
367
310
368
function appendSql ( sql , args ) {
@@ -313,7 +371,7 @@ function appendSql(sql, args) {
313
371
}
314
372
315
373
function appendOrderBy ( { column, direction} , args , escaper ) {
316
- appendSql ( `t. ${ escaper ( column ) } ${ direction . toUpperCase ( ) } ` , args ) ;
374
+ appendSql ( `${ escaper ( column ) } ${ direction . toUpperCase ( ) } ` , args ) ;
317
375
}
318
376
319
377
function appendWhereEntry ( { type, operands} , args , escaper ) {
@@ -398,7 +456,7 @@ function appendWhereEntry({type, operands}, args, escaper) {
398
456
399
457
function appendOperand ( o , args , escaper ) {
400
458
if ( o . type === "column" ) {
401
- appendSql ( `t. ${ escaper ( o . value ) } ` , args ) ;
459
+ appendSql ( escaper ( o . value ) , args ) ;
402
460
} else {
403
461
args . push ( o . value ) ;
404
462
args [ 0 ] . push ( "" ) ;
@@ -421,7 +479,9 @@ function likeOperand(operand) {
421
479
}
422
480
423
481
// This function applies table cell operations to an in-memory table (array of
424
- // objects); it should be equivalent to the corresponding SQL query.
482
+ // objects); it should be equivalent to the corresponding SQL query. TODO Use
483
+ // DuckDBClient for data arrays, too, and then we wouldn’t need our own __table
484
+ // function to do table operations on in-memory data?
425
485
export function __table ( source , operations ) {
426
486
const input = source ;
427
487
let { schema, columns} = source ;
0 commit comments