From e9d82bbfd3526a6a1a5f4a72be19e5eafe4c36c5 Mon Sep 17 00:00:00 2001 From: Jacob Bowdoin <7559478+jacob-8@users.noreply.github.com> Date: Thu, 28 Nov 2024 08:33:59 +0800 Subject: [PATCH] Csv import to supabase (#505) * add senses and sentences * import dialects * import photo and audio * fix onondaga sort * save import content updates * don't write log files in CI * only do content_updates for entry to denote import_id * add tags to db and separate multiples in same column of imports with pipe * fix multiple senses when having multiple audio bug --- .github/workflows/supabase-tests.yml | 4 +- packages/functions/.gitignore | 11 - .../deleteMediaOnDictionaryDelete.ts | 5 + .../{src/export => }/interfaceExplanations.ts | 2 +- packages/functions/package.json | 35 - packages/functions/recursiveDelete.ts | 46 + .../src/aggregation/countAllEntries.ts | 19 - packages/functions/src/aggregation/index.ts | 17 - packages/functions/src/auth.ts | 14 - packages/functions/src/config.ts | 15 - packages/functions/src/db.ts | 5 - .../deletion/deleteMediaOnDictionaryDelete.ts | 15 - .../functions/src/deletion/recursiveDelete.ts | 46 - .../src/export/semanticDomainOfDictionary.ts | 124 -- packages/functions/src/helpers.ts | 34 - packages/functions/src/history.ts | 22 - packages/functions/src/index.ts | 69 - .../functions/src/video/uploadToYouTube.bak | 57 - packages/functions/tsconfig.json.bak | 26 - packages/functions/vitest.config.ts | 11 - packages/scripts/.gitignore | 3 +- packages/scripts/config-firebase.ts | 17 +- packages/scripts/config-supabase.ts | 9 +- packages/scripts/countAllEntries.ts | 25 - ...nvert_row_to_objects_for_databases.test.ts | 1506 ----------------- .../convert_row_to_objects_for_databases.ts | 195 --- .../example-v4-senses/example-v4-senses.csv | 4 +- .../scripts/import/generate-sql-statements.ts | 362 ++++ packages/scripts/import/getImageServingUrl.ts | 25 +- packages/scripts/import/import-data.snap.json | 547 ++++++ packages/scripts/import/import-data.test.ts | 378 +++++ packages/scripts/import/import-data.ts | 62 + packages/scripts/import/import-media.ts | 73 +- .../import/import-to-firebase-supabase.ts | 220 --- packages/scripts/import/import.ts | 66 +- .../scripts/import/incrementing-timestamp.ts | 7 + .../helpers/abbreviate-td-pos.ts | 42 - .../helpers/clean-up-data.ts | 13 - .../helpers/delete-duplicate-entries.ts | 150 -- .../helpers/find-unmatched-pos.ts | 46 - .../old-firebase-function/helpers/unzip.ts | 72 - .../old-firebase-function/importing2.ts | 228 --- .../old-firebase-function/mock-dictionary.ts | 16 - .../old/convertJsonRowToEntryFormat.test.ts | 282 --- .../import/old/convertJsonRowToEntryFormat.ts | 153 -- packages/scripts/import/old/filterArray.ts | 25 - packages/scripts/import/old/gatherPOS.ts | 33 - .../import/old/import-spreadsheet-v4.ts | 101 -- .../scripts/import/old/import-spreadsheet.ts | 75 - .../scripts/import/old/importToFirestore.ts | 199 --- .../import/{ => old}/parseSourceFromNotes.ts | 0 .../import/old/tdv1-import/find-languages.ts | 39 - .../import/old/tdv1-import/import-old-td.ts | 108 -- packages/scripts/import/post-request.ts | 55 - packages/scripts/import/row.type.ts | 27 +- .../to-sql-string.ts | 5 +- packages/scripts/migrate-to-supabase/notes.md | 9 +- .../scripts/migrate-to-supabase/reset-db.ts | 17 - .../save-content-update.ts | 4 +- packages/scripts/package.json | 20 +- packages/scripts/record-logs.ts | 19 +- packages/scripts/refactor/entry-refactor.ts | 181 -- packages/scripts/refactor/get-email.ts | 2 +- .../refactor/move-firestore-document.ts | 3 - .../reverse-semantic-domains-mapping.ts | 81 - packages/scripts/refactor/speaker-refactor.ts | 161 -- .../refactor/turn-dialects-to-arrays.ts | 21 - .../refactor/upload-old-dictionaries.ts | 31 - packages/scripts/reset-local-db.ts | 26 + ...g.migration.ts => vitest.config.import.ts} | 4 +- packages/scripts/vitest.config.ts | 2 +- .../site/src/db-tests/content-update.test.bak | 6 +- packages/site/src/docs/Supabase.md | 2 +- .../docs/misc/JSON-to-Firestore-Importer.md | 33 - .../site/src/docs/misc/functions-config.md | 12 - .../src/docs/misc/functions-unit-tests.md | 44 - .../site/src/docs/misc/import-dictionary.md | 3 +- packages/site/src/lib/mocks/seed/tables.ts | 4 +- .../site/src/lib/search/entries-schema.ts | 5 + .../lib/search/multilingual-tokenizer.test.ts | 23 +- .../site/src/lib/search/search-entries.ts | 30 +- .../[dictionaryId]/entry/[entryId]/+page.ts | 14 +- packages/types/supabase/augments.types.ts | 29 +- packages/types/supabase/combined.types.ts | 245 ++- .../supabase/content-import.interface.ts | 42 +- packages/types/supabase/generated.types.ts | 195 ++- pnpm-lock.yaml | 148 -- .../20241024024631_faster_entries_view.sql | 2 +- ...2644_tags_content-update-type-and-data.sql | 329 ++++ supabase/seed.sql | 4 +- vitest.workspace.ts | 1 - 91 files changed, 2365 insertions(+), 5132 deletions(-) delete mode 100644 packages/functions/.gitignore create mode 100644 packages/functions/deleteMediaOnDictionaryDelete.ts rename packages/functions/{src/export => }/interfaceExplanations.ts (99%) delete mode 100644 packages/functions/package.json create mode 100644 packages/functions/recursiveDelete.ts delete mode 100644 packages/functions/src/aggregation/countAllEntries.ts delete mode 100644 packages/functions/src/aggregation/index.ts delete mode 100644 packages/functions/src/auth.ts delete mode 100644 packages/functions/src/config.ts delete mode 100644 packages/functions/src/db.ts delete mode 100644 packages/functions/src/deletion/deleteMediaOnDictionaryDelete.ts delete mode 100644 packages/functions/src/deletion/recursiveDelete.ts delete mode 100644 packages/functions/src/export/semanticDomainOfDictionary.ts delete mode 100644 packages/functions/src/helpers.ts delete mode 100644 packages/functions/src/history.ts delete mode 100644 packages/functions/src/index.ts delete mode 100644 packages/functions/src/video/uploadToYouTube.bak delete mode 100644 packages/functions/tsconfig.json.bak delete mode 100644 packages/functions/vitest.config.ts delete mode 100644 packages/scripts/countAllEntries.ts delete mode 100644 packages/scripts/import/convert_row_to_objects_for_databases.test.ts delete mode 100644 packages/scripts/import/convert_row_to_objects_for_databases.ts create mode 100644 packages/scripts/import/generate-sql-statements.ts create mode 100644 packages/scripts/import/import-data.snap.json create mode 100644 packages/scripts/import/import-data.test.ts create mode 100644 packages/scripts/import/import-data.ts delete mode 100644 packages/scripts/import/import-to-firebase-supabase.ts create mode 100644 packages/scripts/import/incrementing-timestamp.ts delete mode 100644 packages/scripts/import/old-firebase-function/helpers/abbreviate-td-pos.ts delete mode 100644 packages/scripts/import/old-firebase-function/helpers/clean-up-data.ts delete mode 100644 packages/scripts/import/old-firebase-function/helpers/delete-duplicate-entries.ts delete mode 100644 packages/scripts/import/old-firebase-function/helpers/find-unmatched-pos.ts delete mode 100644 packages/scripts/import/old-firebase-function/helpers/unzip.ts delete mode 100644 packages/scripts/import/old-firebase-function/importing2.ts delete mode 100644 packages/scripts/import/old-firebase-function/mock-dictionary.ts delete mode 100644 packages/scripts/import/old/convertJsonRowToEntryFormat.test.ts delete mode 100644 packages/scripts/import/old/convertJsonRowToEntryFormat.ts delete mode 100644 packages/scripts/import/old/filterArray.ts delete mode 100644 packages/scripts/import/old/gatherPOS.ts delete mode 100644 packages/scripts/import/old/import-spreadsheet-v4.ts delete mode 100644 packages/scripts/import/old/import-spreadsheet.ts delete mode 100644 packages/scripts/import/old/importToFirestore.ts rename packages/scripts/import/{ => old}/parseSourceFromNotes.ts (100%) delete mode 100644 packages/scripts/import/old/tdv1-import/find-languages.ts delete mode 100644 packages/scripts/import/old/tdv1-import/import-old-td.ts delete mode 100644 packages/scripts/import/post-request.ts rename packages/scripts/{migrate-to-supabase => import}/to-sql-string.ts (93%) delete mode 100644 packages/scripts/migrate-to-supabase/reset-db.ts delete mode 100644 packages/scripts/refactor/entry-refactor.ts delete mode 100644 packages/scripts/refactor/reverse-semantic-domains-mapping.ts delete mode 100644 packages/scripts/refactor/speaker-refactor.ts delete mode 100644 packages/scripts/refactor/turn-dialects-to-arrays.ts delete mode 100644 packages/scripts/refactor/upload-old-dictionaries.ts create mode 100644 packages/scripts/reset-local-db.ts rename packages/scripts/{vitest.config.migration.ts => vitest.config.import.ts} (77%) delete mode 100644 packages/site/src/docs/misc/JSON-to-Firestore-Importer.md delete mode 100644 packages/site/src/docs/misc/functions-config.md delete mode 100644 packages/site/src/docs/misc/functions-unit-tests.md create mode 100644 supabase/migrations/20241127042644_tags_content-update-type-and-data.sql diff --git a/.github/workflows/supabase-tests.yml b/.github/workflows/supabase-tests.yml index 8fdf485c7..5d6459c69 100644 --- a/.github/workflows/supabase-tests.yml +++ b/.github/workflows/supabase-tests.yml @@ -1,7 +1,7 @@ name: Supabase DB Vitest Tests on: pull_request: - branches: [ main ] + branches: [main] jobs: build: @@ -28,3 +28,5 @@ jobs: - name: Run DB Tests run: pnpm test:db:ci + + # - run: pnpm -F scripts test:import # TODO: ordering needs made deterministic before we can run this diff --git a/packages/functions/.gitignore b/packages/functions/.gitignore deleted file mode 100644 index 5256c4993..000000000 --- a/packages/functions/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -# Compiled JavaScript files -lib/**/*.js -lib/**/*.js.map - -# TypeScript v1 declaration files -typings/ - -# Node.js dependency directory -node_modules/ - -firebase-debug.log \ No newline at end of file diff --git a/packages/functions/deleteMediaOnDictionaryDelete.ts b/packages/functions/deleteMediaOnDictionaryDelete.ts new file mode 100644 index 000000000..42b7e604d --- /dev/null +++ b/packages/functions/deleteMediaOnDictionaryDelete.ts @@ -0,0 +1,5 @@ +// const bucket = admin.storage().bucket() + +// return bucket.deleteFiles({ +// prefix: `${dictionaryId}`, +// }) diff --git a/packages/functions/src/export/interfaceExplanations.ts b/packages/functions/interfaceExplanations.ts similarity index 99% rename from packages/functions/src/export/interfaceExplanations.ts rename to packages/functions/interfaceExplanations.ts index e971cd431..d24586576 100644 --- a/packages/functions/src/export/interfaceExplanations.ts +++ b/packages/functions/interfaceExplanations.ts @@ -38,4 +38,4 @@ export const entryInterface = { ca: 'created at', ua: 'updated at', ei: 'Elicitation Id', -}; +} diff --git a/packages/functions/package.json b/packages/functions/package.json deleted file mode 100644 index 86131acad..000000000 --- a/packages/functions/package.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "name": "@living-dictionaries/functions", - "type": "commonjs", - "version": "0.0.1", - "private": true, - "main": "lib/index.js", - "engines": { - "node": "20" - }, - "scripts": { - "build": "tsc", - "build:watch": "tsc --watch", - "serve": "npm run build && firebase emulators:start --only functions", - "shell": "npm run build && firebase functions:shell", - "start": "npm run shell", - "deploy": "firebase deploy --only functions", - "deploy:specific": "firebase deploy --only functions:increaseEntryCount,functions:decreaseEntryCount,functions:countAllEntries -P default", - "deploy:specific:prod": "firebase deploy --only functions:increaseEntryCount,functions:decreaseEntryCount,functions:countAllEntries -P production", - "logs": "firebase functions:log", - "test": "vitest" - }, - "dependencies": { - "algoliasearch": "^4.13.0", - "firebase-admin": "^12.0.0", - "firebase-functions": "^4.3.1", - "firebase-tools": "^12.3.0" - }, - "devDependencies": { - "@living-dictionaries/types": "^1.0.0", - "@types/node": "^18.11.18", - "node-fetch": "^2.6.7", - "typescript": "^5.1.6", - "vitest": "^2.0.5" - } -} diff --git a/packages/functions/recursiveDelete.ts b/packages/functions/recursiveDelete.ts new file mode 100644 index 000000000..9de975e75 --- /dev/null +++ b/packages/functions/recursiveDelete.ts @@ -0,0 +1,46 @@ +// import * as functions from 'firebase-functions'; +// const firebase_tools = require('firebase-tools'); + +// /** +// * Initiate a recursive delete of documents at a given path. +// * +// * The calling user must be authenticated and have the custom "admin" attribute +// * set to true on the auth token. +// * +// * This delete is NOT an atomic operation and it's possible +// * that it may fail after only deleting some documents. +// * +// * @param {string} data.path the document or collection path to delete. +// */ +// export default async (data: any, context: functions.https.CallableContext) => { +// // if (!(context.auth && context.auth.token && context.auth.token.admin)) { +// // Only allow authorized users to execute this function. // Could improve by reading user data from Firestore and looking at admin role > 1 +// if ( +// !( +// context.auth && +// (context.auth.uid === '0seqYnZOqkUz050y6jVQI9QvlW62' || +// context.auth.uid === '2PELJgjxMHXEOcuZfv9MtGyiXdE3') +// ) +// ) { +// throw new functions.https.HttpsError( +// 'permission-denied', +// 'Must be an administrative user to initiate delete.' +// ); +// } + +// const {path} = data; +// console.log(`User ${context.auth.uid} has requested to delete path ${path}`); + +// // Run a recursive delete on the given document or collection path. +// // The 'token' must be set in the functions config, and can be generated +// // at the command line by running 'firebase login:ci'. +// await firebase_tools.firestore.delete(path, { +// project: process.env.GCLOUD_PROJECT, +// recursive: true, +// yes: true, +// token: functions.config().fb.token, +// }); +// return { +// path, +// }; +// }; diff --git a/packages/functions/src/aggregation/countAllEntries.ts b/packages/functions/src/aggregation/countAllEntries.ts deleted file mode 100644 index 5e53a330f..000000000 --- a/packages/functions/src/aggregation/countAllEntries.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { db } from '../db'; - -export async function countAllEntries() { - let overallEntryCount = 0; - - const dictionarySnaps = await db.collection('dictionaries').get(); - const dictionaryIds = dictionarySnaps.docs.map(doc => doc.id); - - for (const dictionaryId of dictionaryIds) { - const countData = await db.collection(`dictionaries/${dictionaryId}/words`).count().get(); - const { count: entryCount } = countData.data(); - overallEntryCount += entryCount; - await db.doc(`dictionaries/${dictionaryId}`).update({ entryCount }); - } - - await db.doc('stats/data').update({ overallEntryCount }); - - return true; -} diff --git a/packages/functions/src/aggregation/index.ts b/packages/functions/src/aggregation/index.ts deleted file mode 100644 index a492f5d9c..000000000 --- a/packages/functions/src/aggregation/index.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { FirestoreEvent, QueryDocumentSnapshot } from 'firebase-functions/v2/firestore'; -import { db } from '../db'; -import { FieldValue } from 'firebase-admin/firestore'; - -export async function increaseEntryCount({ params }: FirestoreEvent) { - const {dictionaryId} = params; - await db.doc(`dictionaries/${dictionaryId}`).update({ entryCount: FieldValue.increment(1) }); - await db.doc('stats/data').update({ overallEntryCount: FieldValue.increment(1) }); - return true; -} - -export async function decreaseEntryCount({ params }: FirestoreEvent) { - const {dictionaryId} = params; - await db.doc(`dictionaries/${dictionaryId}`).update({ entryCount: FieldValue.increment(-1) }); - await db.doc('stats/data').update({ overallEntryCount: FieldValue.increment(-1) }); - return true; -} diff --git a/packages/functions/src/auth.ts b/packages/functions/src/auth.ts deleted file mode 100644 index 1f405296c..000000000 --- a/packages/functions/src/auth.ts +++ /dev/null @@ -1,14 +0,0 @@ -// import * as functions from 'firebase-functions'; -// import { db } from './config'; - -// export const createUserRecord = functions.auth -// .user() -// .onCreate((user, context) => { -// const userRef = db.doc(`users/${user.uid}`); - -// return userRef.set({ -// name: user.displayName, -// createdAt: context.timestamp, -// nickname: 'bubba' -// }); -// }); diff --git a/packages/functions/src/config.ts b/packages/functions/src/config.ts deleted file mode 100644 index 94c0549d1..000000000 --- a/packages/functions/src/config.ts +++ /dev/null @@ -1,15 +0,0 @@ -import * as admin from 'firebase-admin'; -export const firebase = admin.initializeApp(); - -export const db = admin.firestore(); -const settings = { timestampsInSnapshots: true}; -db.settings(settings); - -export const timestamp = admin.firestore.FieldValue.serverTimestamp(); -export const oneMinuteAgo = admin.firestore.Timestamp.fromDate(new Date(Date.now() - 1000 * 60)); -export const testingTimestamp = admin.firestore.Timestamp.fromDate(new Date); - -export const increment = admin.firestore.FieldValue.increment(1); -export const decrement = admin.firestore.FieldValue.increment(-1); - -export const storage = admin.storage(); diff --git a/packages/functions/src/db.ts b/packages/functions/src/db.ts deleted file mode 100644 index 19c7db1aa..000000000 --- a/packages/functions/src/db.ts +++ /dev/null @@ -1,5 +0,0 @@ -import { initializeApp } from 'firebase-admin/app'; -import { getFirestore } from 'firebase-admin/firestore'; - -initializeApp(); -export const db = getFirestore(); diff --git a/packages/functions/src/deletion/deleteMediaOnDictionaryDelete.ts b/packages/functions/src/deletion/deleteMediaOnDictionaryDelete.ts deleted file mode 100644 index c7ba7da8e..000000000 --- a/packages/functions/src/deletion/deleteMediaOnDictionaryDelete.ts +++ /dev/null @@ -1,15 +0,0 @@ -import * as functions from 'firebase-functions'; -import * as admin from 'firebase-admin'; -admin.initializeApp(); - -export default ( - snapshot: functions.firestore.DocumentSnapshot, - context: functions.EventContext -) => { - const { dictionaryId } = context.params; - const bucket = admin.storage().bucket(); - - return bucket.deleteFiles({ - prefix: `${dictionaryId}` - }); -}; diff --git a/packages/functions/src/deletion/recursiveDelete.ts b/packages/functions/src/deletion/recursiveDelete.ts deleted file mode 100644 index 2942663da..000000000 --- a/packages/functions/src/deletion/recursiveDelete.ts +++ /dev/null @@ -1,46 +0,0 @@ -import * as functions from 'firebase-functions'; -const firebase_tools = require('firebase-tools'); - -/** - * Initiate a recursive delete of documents at a given path. - * - * The calling user must be authenticated and have the custom "admin" attribute - * set to true on the auth token. - * - * This delete is NOT an atomic operation and it's possible - * that it may fail after only deleting some documents. - * - * @param {string} data.path the document or collection path to delete. - */ -export default async (data: any, context: functions.https.CallableContext) => { - // if (!(context.auth && context.auth.token && context.auth.token.admin)) { - // Only allow authorized users to execute this function. // Could improve by reading user data from Firestore and looking at admin role > 1 - if ( - !( - context.auth && - (context.auth.uid === '0seqYnZOqkUz050y6jVQI9QvlW62' || - context.auth.uid === '2PELJgjxMHXEOcuZfv9MtGyiXdE3') - ) - ) { - throw new functions.https.HttpsError( - 'permission-denied', - 'Must be an administrative user to initiate delete.' - ); - } - - const {path} = data; - console.log(`User ${context.auth.uid} has requested to delete path ${path}`); - - // Run a recursive delete on the given document or collection path. - // The 'token' must be set in the functions config, and can be generated - // at the command line by running 'firebase login:ci'. - await firebase_tools.firestore.delete(path, { - project: process.env.GCLOUD_PROJECT, - recursive: true, - yes: true, - token: functions.config().fb.token, - }); - return { - path, - }; -}; diff --git a/packages/functions/src/export/semanticDomainOfDictionary.ts b/packages/functions/src/export/semanticDomainOfDictionary.ts deleted file mode 100644 index 6036fd62e..000000000 --- a/packages/functions/src/export/semanticDomainOfDictionary.ts +++ /dev/null @@ -1,124 +0,0 @@ -// Example Usage: -// http://localhost:5001/talking-dictionaries-dev/us-central1/exportSemanticDomainOfDictionary?dictionaryID=gutob&semanticDomainID=1.3 - -// https://us-central1-talking-dictionaries-dev.cloudfunctions.net/exportSemanticDomainOfDictionary?dictionaryID=achi-1579819002171&semanticDomainID=1.1 -// https://us-central1-talking-dictionaries-alpha.cloudfunctions.net/exportSemanticDomainOfDictionary?dictionaryID=kera-mundari&semanticDomainID=2 - -import * as functions from 'firebase-functions'; - -import * as admin from 'firebase-admin'; -admin.initializeApp(); -const projectId = functions.config().project?.key; - -import { IDictionary, ActualDatabaseEntry } from '@living-dictionaries/types'; -import { entryInterface } from './interfaceExplanations'; - -export default async ( - request: functions.https.Request, - response: functions.Response -): Promise => { - console.log('Export Semantic Domains Request Query params: ', request.query); - const queryParams = request.query; - if (queryParams?.dictionaryID && queryParams?.semanticDomainID) { - const {dictionaryID} = queryParams; - const {semanticDomainID} = queryParams; - - const dictionarySnap = await admin.firestore().doc(`dictionaries/${dictionaryID}`).get(); - const dictionaryDoc = dictionarySnap.data() as IDictionary; - - if (dictionaryDoc && semanticDomainID === '1.7') { - const entriesSnapshot = await admin - .firestore() - .collection(`dictionaries/${dictionaryID}/words`) - .where('sdn', 'array-contains', semanticDomainID) - .get(); - const storageBucket = `talking-dictionaries-${ - projectId === 'talking-dictionaries-alpha' ? 'alpha' : 'dev' - }.appspot.com`; - - const entries = entriesSnapshot.docs.map((snap) => { - const entry = snap.data() as ActualDatabaseEntry; - delete entry.ii; - delete entry.cb; - // @ts-ignore - delete entry.createdBy; - delete entry.ub; - // @ts-ignore - delete entry.updatedBy; - // @ts-ignore - entry.sourceURL = `https://livingdictionaries.app/${dictionaryID}/entry/${snap.id}`; - - if (entry.sf?.path) { - delete entry.sf.source; - const convertedPath = entry.sf.path.replace(/\//g, '%2F'); - // @ts-ignore; - entry.sf.audioURL = `https://firebasestorage.googleapis.com/v0/b/${storageBucket}/o/${convertedPath}?alt=media`; - delete entry.sf.path; - } - - if (entry.pf?.path) { - delete entry.pf.gcs; - const convertedPath = entry.pf.path.replace(/\//g, '%2F'); - // @ts-ignore; - entry.pf.imageURL = `https://firebasestorage.googleapis.com/v0/b/${storageBucket}/o/${convertedPath}?alt=media`; - delete entry.pf.path; - } - - return { - ...removeEmpty(entry), - ...{ id: snap.id }, - }; - }); - - delete dictionaryDoc.updatedAt; - delete dictionaryDoc.updatedBy; - delete dictionaryDoc.createdAt; - delete dictionaryDoc.createdBy; - - const data = { - dataRetrieval: { - semanticDomain: semanticDomain(semanticDomainID as string), - semanticDomainID, - timestamp: new Date(Date.now()), - query: request.url, - URL: - `https://us-central1-talking-dictionaries-${ - projectId === 'talking-dictionaries-alpha' ? 'alpha' : 'dev' - }.cloudfunctions.net/exportSemanticDomainOfDictionary` + request.url, - }, - dictionary: { - dictionaryID, - URL: `https://livingdictionaries.app/${dictionaryID}`, - creditURL: `https://livingdictionaries.app/${dictionaryID}/contributors`, - ...dictionaryDoc, - }, - entryCount: entriesSnapshot.size, - entries, - entryInterface, - partOfSpeechMappings: partsOfSpeech, - semanticDomainNumberMappings: semanticDomains, - }; - response.send(data); - } else { - response.send('Invalid dictionary or semantic domain. Please contact us for help if needed.'); - } - } else { - response.send('Invalid request - please contact us to help you form a valid request.'); - } -}; - -function removeEmpty(obj: any) { - Object.keys(obj).forEach( - (k) => - (obj[k] && typeof obj[k] === 'object' && removeEmpty(obj[k])) || - (!obj[k] && obj[k] !== undefined && delete obj[k]) - ); - return obj; -} - -function semanticDomain(input: string): string { - const matching = semanticDomains.find((domain) => { - return domain.key === input; - }); - return matching?.name || 'NOT FOUND'; -} diff --git a/packages/functions/src/helpers.ts b/packages/functions/src/helpers.ts deleted file mode 100644 index 5ba95eb9b..000000000 --- a/packages/functions/src/helpers.ts +++ /dev/null @@ -1,34 +0,0 @@ -// import * as functions from 'firebase-functions'; - -// /** -// * Validates data payload of a callable function -// */ -// export const assert = (data: any, key: string) => { -// if (!data[key]) { -// throw new functions.https.HttpsError('invalid-argument', `function called without ${key} data`); -// } else { -// return data[key]; -// } -// } - -// /** -// * Validates auth context for callable function -// */ -// export const assertUID = (context: any) => { -// if (!context.auth) { -// throw new functions.https.HttpsError('permission-denied', 'function called without context.auth'); -// } else { -// return context.auth.uid; -// } -// } - -// /** -// * Sends a descriptive error response when running a callable function -// */ -// export const catchErrors = async (promise: Promise) => { -// try { -// return await promise; -// } catch(err) { -// throw new functions.https.HttpsError('unknown', err); -// } -// } diff --git a/packages/functions/src/history.ts b/packages/functions/src/history.ts deleted file mode 100644 index b7d9d8a80..000000000 --- a/packages/functions/src/history.ts +++ /dev/null @@ -1,22 +0,0 @@ -import * as functions from 'firebase-functions'; -import { db, oneMinuteAgo } from './config'; - -export const saveVersionHistory = functions.firestore - .document('dictionaries/{dictionaryId}/words/{wordId}') - .onUpdate(async (change, context) => { - const newValue = change.after.data(); - const previousValue = change.before.data(); - console.log(newValue, previousValue); - if (!previousValue.ua) - previousValue.ua = oneMinuteAgo; // to keep history from getting out of order in case of entries without an updated at field - - console.log(newValue, previousValue); - - const {dictionaryId} = context.params; - const {wordId} = context.params; - const wordHistoryColRef = db.collection(`dictionaries/${dictionaryId}/words/${wordId}/history`); - await wordHistoryColRef.add({previousValue}); - // TODO, make sure app is saving updatedAt timestamps - - return true; - }); diff --git a/packages/functions/src/index.ts b/packages/functions/src/index.ts deleted file mode 100644 index 3054d4738..000000000 --- a/packages/functions/src/index.ts +++ /dev/null @@ -1,69 +0,0 @@ -// Optimization from https://github.com/CodingDoug/min-functions-cold-start -// Read https://medium.com/firebase-developers/organize-cloud-functions-for-max-cold-start-performance-and-readability-with-typescript-and-9261ee8450f0 - -// firebase-functions should be the only imports in index.ts beside function imports -import { firestore } from 'firebase-functions' -import { - onDocumentCreated, - onDocumentDeleted, -} from 'firebase-functions/v2/firestore' -import { onSchedule } from 'firebase-functions/v2/scheduler' - -// exportSemanticDomainOfDictionary // if needed or just work on the api endpoint -// deleteMediaOnDictionaryDelete -// recursiveDelete, .runWith({ timeoutSeconds: 540, memory: '2GB' }) - -// Aggregation -export const increaseEntryCount = onDocumentCreated('dictionaries/{dictionaryId}/words/{wordId}', async (event) => { - await (await import('./aggregation')).increaseEntryCount(event) -}) - -export const decreaseEntryCount = onDocumentDeleted('dictionaries/{dictionaryId}/words/{wordId}', async (event) => { - await (await import('./aggregation')).decreaseEntryCount(event) -}) - -// can manually run task at https://console.cloud.google.com/cloudscheduler?project=talking-dictionaries-alpha -export const countAllEntries = onSchedule('every day 00:00', async () => { - await (await import('./aggregation/countAllEntries')).countAllEntries() -}) - -// Algolia Search Indexing -export const addToIndex = firestore - .document('dictionaries/{dictionaryId}/words/{wordId}') - .onCreate(async (snapshot, context) => { - await (await import('./algolia/modifyIndex')).addToIndex(snapshot, context) - }) - -export const updateIndex = firestore - .document('dictionaries/{dictionaryId}/words/{wordId}') - .onUpdate(async (change, context) => { - await (await import('./algolia/modifyIndex')).updateIndex(change, context) - }) - -export const deleteFromIndex = firestore - .document('dictionaries/{dictionaryId}/words/{wordId}') - .onDelete(async (snapshot, _context) => { - await (await import('./algolia/modifyIndex')).deleteFromIndex(snapshot) - }) - -// Video - -/* export const uploadToYouTube = functions - .runWith({ - timeoutSeconds: 540, - memory: '4GB', - }) - .firestore.document('dictionaries/{dictionaryId}/words/{entryId}/videos/{videoId') - .onCreate(async (snapshot, context) => { - await (await import('./video/uploadToYouTube')).uploadToYouTube(snapshot, context); - }); */ - -// export const latestYoutubeVideo = functions.https.onRequest(async (req, res) => { -// await (await import('./video/uploadToYouTube')).latestYoutubeVideo(req, res); -// }); - -// export const test = functions.https.onRequest(async (req, res) => { -// console.log('This is just a simple test'); -// // Send back a message that we've successfully written the message -// res.json({ result: `Hello World` }); -// }); diff --git a/packages/functions/src/video/uploadToYouTube.bak b/packages/functions/src/video/uploadToYouTube.bak deleted file mode 100644 index 3ce0e2cc8..000000000 --- a/packages/functions/src/video/uploadToYouTube.bak +++ /dev/null @@ -1,57 +0,0 @@ -import * as functions from 'firebase-functions'; -import { google } from 'googleapis'; -// import YouTubeUploadAPI; - -// Set by running `firebase functions:config:set youtube.key="..."` -// read with `firebase functions:config:get` -//console.log('YouTube key', functions.config().youtube); - -const youtube = google.youtube({ - version: 'v3', - auth: [[YOUTUBE_API]], -}); -//const KEY = functions.config().youtube.key; -const projectId = [[PROJECT_ID]]; - -// const api = new YouTubeUploadAPI(KEY); - -// import { IVideo } from '../../../src/lib/interfaces'; -//TESTING -export const latestYoutubeVideo = functions.https.onRequest(async (req, res: any) => { - // Get channelId from query string - const { channelId } = req.query; - // Generate query to Youtube API - // Get a list, ordered by date and limited to one item - // Frankly, it's an array with 1 latest video - const { data } = await youtube.search.list({ - part: ['id'], - channelId, - }); - - // Get ID object from items[0] - const { id } = data.items[0]; - - // Get Video ID from Id object - // Redirect to link with this video ID - return res.redirect(`https://www.youtube.com/watch?v=${id.videoId}`); //res.json({ test: data }); -}); - -/* export const uploadToYouTube = async ( - snapshot: functions.firestore.DocumentSnapshot, - context: functions.EventContext -) => { - const videoID = snapshot.id; - console.log(`uploading ${videoID} to YouTube`); - const dictionaryId = context.params.dictionaryId; - const entryId = context.params.entryId; - // const video = snapshot.data() as IVideo; - - if (projectId === 'talking-dictionaries-alpha') { - // uploaded from prod (may not be needed?) - } else { - // uploaded from dev - } - - // write error/success state to video doc in Firestore - return true; -}; */ diff --git a/packages/functions/tsconfig.json.bak b/packages/functions/tsconfig.json.bak deleted file mode 100644 index dc8be229e..000000000 --- a/packages/functions/tsconfig.json.bak +++ /dev/null @@ -1,26 +0,0 @@ -{ - "compilerOptions": { - // "moduleResolution": "node", // turn on if changing to esm - // "module": "es2020", - "module": "commonjs", // use 'es2020' if changing to esm - "noImplicitReturns": true, - // "noUnusedLocals": true, // turned off - "outDir": "lib", - "sourceMap": true, - // "strict": true, // turned off - "resolveJsonModule": true, - "esModuleInterop": true, - "target": "es2017", - "types": [ - "vitest/globals", - "vitest/importMeta" - ] - }, - "compileOnSave": true, - "include": [ - "src/algolia", - "src/email", - "src/index.ts" - ], - // "exclude": ["./**/*test.ts"] -} \ No newline at end of file diff --git a/packages/functions/vitest.config.ts b/packages/functions/vitest.config.ts deleted file mode 100644 index f153d3025..000000000 --- a/packages/functions/vitest.config.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { defineProject } from 'vitest/config'; - -export default defineProject({ - test: { - name: 'functions:unit', - globals: true, - // includeSource: ['./src/**/*.ts'], - include: ['./src/**/*.test.ts'], - // exclude: [...defaultExclude, 'lib'], - }, -}); diff --git a/packages/scripts/.gitignore b/packages/scripts/.gitignore index 032816f4e..56da80ae4 100644 --- a/packages/scripts/.gitignore +++ b/packages/scripts/.gitignore @@ -1,6 +1,5 @@ logs -service-account* +service-accounts.ts .env sheets-viewer-SA.json -.env.supabase .env.production.supabase \ No newline at end of file diff --git a/packages/scripts/config-firebase.ts b/packages/scripts/config-firebase.ts index f66131373..f9f57d24c 100644 --- a/packages/scripts/config-firebase.ts +++ b/packages/scripts/config-firebase.ts @@ -3,22 +3,19 @@ import { cert, initializeApp } from 'firebase-admin/app' import { FieldValue, getFirestore } from 'firebase-admin/firestore' import { getStorage } from 'firebase-admin/storage' import { getAuth } from 'firebase-admin/auth' -// import serviceAccountDev from './service-account-dev.json'; -// import serviceAccountProd from './service-account-prod.json'; -import { serviceAccountDev, serviceAccountProd } from './service-accounts' +import { firebase_dev_service_account, firebase_prod_service_account } from './service-accounts' import './record-logs' program - // .version('0.0.1') - .option('-e, --environment [dev/prod]', 'Firebase Project', 'dev') + .option('-e, --environment [dev/prod]', 'Firebase/Supabase Project', 'dev') .allowUnknownOption() // because config is shared by multiple scripts .parse(process.argv) export const environment = program.opts().environment === 'prod' ? 'prod' : 'dev' -export const projectId - = environment === 'prod' ? 'talking-dictionaries-alpha' : 'talking-dictionaries-dev' +console.log(`Firebase running on ${environment}`) -const serviceAccount = environment === 'dev' ? serviceAccountDev : serviceAccountProd +const serviceAccount = environment === 'dev' ? firebase_dev_service_account : firebase_prod_service_account +export const projectId = serviceAccount.project_id initializeApp({ // @ts-expect-error @@ -27,10 +24,6 @@ initializeApp({ storageBucket: `${projectId}.appspot.com`, }) export const db = getFirestore() -// const settings = { timestampsInSnapshots: true }; -// db.settings(settings); export const timestamp = FieldValue.serverTimestamp() export const storage = getStorage() export const auth = getAuth() - -console.log(`Firebase running on ${environment}`) diff --git a/packages/scripts/config-supabase.ts b/packages/scripts/config-supabase.ts index 6c25946e6..685b15e1a 100644 --- a/packages/scripts/config-supabase.ts +++ b/packages/scripts/config-supabase.ts @@ -1,9 +1,9 @@ +import { program } from 'commander' import PG from 'pg' import { createClient } from '@supabase/supabase-js' import type { Database } from '@living-dictionaries/types' import * as dotenv from 'dotenv' import './record-logs' -import { program } from 'commander' program .option('-e, --environment [dev/prod]', 'Firebase/Supabase Project', 'dev') @@ -14,7 +14,7 @@ export const environment = program.opts().environment === 'prod' ? 'prod' : 'dev console.log(`Supabase running on ${environment}`) if (environment === 'dev') { - dotenv.config({ path: '.env.supabase' }) // local project variables + dotenv.config({ path: '../site/.env.development' }) } else { dotenv.config({ path: '.env.production.supabase' }) } @@ -22,6 +22,8 @@ if (environment === 'dev') { export const admin_supabase = createClient(process.env.PUBLIC_SUPABASE_API_URL, process.env.SUPABASE_SERVICE_ROLE_KEY) export const anon_supabase = createClient(process.env.PUBLIC_SUPABASE_API_URL, process.env.PUBLIC_SUPABASE_ANON_KEY) export const jacob_ld_user_id = 'de2d3715-6337-45a3-a81a-d82c3210b2a7' +export const diego_ld_user_id = 'be43b1dd-6c64-494d-b5da-10d70c384433' +export const test_dictionary_id = 'test_dictionary_id' class DB { private pool: PG.Pool @@ -39,7 +41,7 @@ class DB { user: 'postgres.actkqboqpzniojhgtqzw', host: 'aws-0-us-west-1.pooler.supabase.com', database: 'postgres', - password: process.env.PUBLIC_SUPABASE_DB_PASSWORD, + password: process.env.SUPABASE_DB_PASSWORD, port: 6543, }), max: 10, @@ -64,6 +66,7 @@ class DB { await client.query(query) } catch (error) { console.error('Error executing query:', error) + // @ts-expect-error throw new Error(error) } finally { client.release() diff --git a/packages/scripts/countAllEntries.ts b/packages/scripts/countAllEntries.ts deleted file mode 100644 index 1da24edea..000000000 --- a/packages/scripts/countAllEntries.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { db } from './config-firebase' - -export async function countAllEntries() { - let overallEntryCount = 0 - - const dictionarySnaps = await db.collection('dictionaries').get() - const dictionaryIds = dictionarySnaps.docs.map(doc => doc.id) - - for (const dictionaryId of dictionaryIds) { - if (dictionaryId.startsWith('tdv1-')) continue - - const countData = await db.collection(`dictionaries/${dictionaryId}/words`).count().get() - const { count: entryCount } = countData.data() - console.log({ dictionaryId, entryCount, overallEntryCount }) - overallEntryCount += entryCount - console.log({ dictionaryId, entryCount, overallEntryCount }) - await db.doc(`dictionaries/${dictionaryId}`).update({ entryCount }) - } - - await db.doc('stats/data').update({ overallEntryCount }) - - return true -} - -countAllEntries().then(() => console.log('done')).catch(console.error) diff --git a/packages/scripts/import/convert_row_to_objects_for_databases.test.ts b/packages/scripts/import/convert_row_to_objects_for_databases.test.ts deleted file mode 100644 index 0b2fb4577..000000000 --- a/packages/scripts/import/convert_row_to_objects_for_databases.test.ts +++ /dev/null @@ -1,1506 +0,0 @@ -// Add your tests here, borrowing from what you already have -import { readFileSync } from 'node:fs' -import path from 'node:path' -import type { Timestamp } from 'firebase-admin/firestore' -import { convert_row_to_objects_for_databases } from './convert_row_to_objects_for_databases.js' -import type { Row } from './row.type' -import { parseCSVFrom } from './parse-csv.js' - -const fakeTimeStamp = 10101010 as unknown as Timestamp -const import_id = `v4-1715819006966` - -describe('convertJsonRowToEntryFormat without senses', () => { - test('glosses', () => { - const csv_rows_without_header: Row[] = [ - { - 'lexeme': 'dolphin', - 'localOrthography.2': 'different script of dolphin', - 'es_gloss': 'delfín', - }, - ] - const entries = csv_rows_without_header.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp })) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "es": "delfín", - }, - "ii": "v4-1715819006966", - "lo2": "different script of dolphin", - "lx": "dolphin", - "ua": 10101010, - }, - "supabase_senses": [], - "supabase_sentences": [], - }, - ] - `) - }) - - test('example sentences', () => { - const csv_rows_without_header: Row[] = [ - { - lexeme: 'dolphin', - es_exampleSentence: 'el delfín nada en el océano.', - }, - ] - const entries = csv_rows_without_header.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp })) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "firebase_entry": { - "ca": 10101010, - "gl": {}, - "ii": "v4-1715819006966", - "lx": "dolphin", - "ua": 10101010, - "xs": { - "es": "el delfín nada en el océano.", - }, - }, - "supabase_senses": [], - "supabase_sentences": [], - }, - ] - `) - }) - - test('semantic domains', () => { - const csv_rows_without_header: Row[] = [ - { - 'lexeme': 'dolphins', - 'semanticDomain': '5.15', - 'semanticDomain.2': '1', - 'semanticDomain_custom': 'the sea!', - }, - ] - const entries = csv_rows_without_header.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp })) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "firebase_entry": { - "ca": 10101010, - "gl": {}, - "ii": "v4-1715819006966", - "lx": "dolphins", - "sd": [ - "the sea!", - ], - "sdn": [ - "5.15", - "1", - ], - "ua": 10101010, - }, - "supabase_senses": [], - "supabase_sentences": [], - }, - ] - `) - }) - - test('high-level conversion from csv', () => { - const dictionaryId = 'example-v4' - const file = readFileSync(path.join(__dirname, `./data/${dictionaryId}/${dictionaryId}.csv`), 'utf8') - const rows = parseCSVFrom(file) - const rowsWithoutHeader = removeHeaderRow(rows) - const entries = rowsWithoutHeader.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp })) - - expect(entries).toEqual( - [ - { - firebase_entry: { - ca: 10101010, - di: [ - 'Modern Parisian French', - ], - gl: { - en: 'car', - es: 'auto', - }, - ii: 'v4-1715819006966', - lx: 'voiture', - nt: 'small automobile', - ph: 'vwatyʁ', - ps: [ - 'n', - 'v', - ], - sd: [ - 'vehicle|cars', - ], - sdn: [ - '5.15', - '5', - ], - ua: 10101010, - xs: { - en: 'I drive my car', - es: 'Conduzco mi auto', - vn: 'Je conduis ma voiture', - }, - }, - supabase_senses: [], - supabase_sentences: [], - }, - { - firebase_entry: { - ca: 10101010, - di: [ - 'Modern Parisian French', - 'Quebec French', - ], - gl: { - en: 'tree', - es: 'árbol', - }, - ii: 'v4-1715819006966', - lx: 'arbre', - nt: 'generic term for all kinds of trees', - ph: 'aʁbʁ', - ps: [ - 'n', - 'adj', - ], - scn: [ - 'Acer rubrum', - ], - sdn: [ - '1.4', - '1.2', - ], - ua: 10101010, - xs: { - en: 'The tree gives us shade', - es: 'El árbol nos da sombra', - vn: 'L\'arbre nous donne de l\'ombre', - }, - }, - supabase_senses: [], - supabase_sentences: [], - }, - { - firebase_entry: { - ca: 10101010, - di: [ - 'Modern Parisian French', - ], - gl: { - en: 'tube', - es: 'tubo', - }, - ii: 'v4-1715819006966', - lx: 'tube', - nt: 'a cylindrical device for liquids', - ph: 'tyb', - pl: 'tubes', - ps: [ - 'n', - ], - sd: [ - 'plumbing', - ], - sdn: [ - '5.9', - ], - ua: 10101010, - xs: { - en: 'The water goes through the tubes', - es: 'El agua pasa a través de los tubos', - vn: 'L\'eau passe à travers les tubes', - }, - }, - supabase_senses: [], - supabase_sentences: [], - }, - { - firebase_entry: { - ca: 10101010, - di: [ - 'Quebec French', - ], - gl: { - en: 'car', - es: 'auto', - }, - ii: 'v4-1715819006966', - lx: 'voiture', - nt: 'small automobile', - ph: 'vwɑtYʁ', - ps: [ - 'n', - ], - sd: [ - 'vehicle', - ], - sdn: [ - '5.15', - ], - sr: [ - 'testing sources', - ], - ua: 10101010, - xs: { - en: 'I drive my car', - es: 'Conduzco mi auto', - vn: 'Je conduis ma voiture', - }, - }, - supabase_senses: [], - supabase_sentences: [], - }, - { - firebase_entry: { - ca: 10101010, - di: [ - 'Quebec French', - ], - gl: { - en: 'neutral', - es: 'neutro', - }, - ii: 'v4-1715819006966', - lx: 'neutre', - ph: 'nøʏ̯tʁ̥', - ps: [ - 'adj', - ], - ua: 10101010, - xs: { - en: 'My room is painted with a neutral color.', - es: 'Mi habitación está pintada con un color neutro.', - vn: 'Ma chambre est peinte d\'une couleur neutre.', - }, - }, - supabase_senses: [], - supabase_sentences: [], - }, - { - firebase_entry: { - ca: 10101010, - di: [ - 'Quebec French', - ], - gl: { - en: 'to celebrate', - es: 'celebrar', - }, - ii: 'v4-1715819006966', - lx: 'fêter', - nt: 'to have a party', - ph: 'fɛɪ̯te', - ps: [ - 'v', - ], - sr: [ - 'test source', - 'with multiples sources, test', - 'https://example.com', - ], - ua: 10101010, - xs: { - en: 'We will really party tonight', - es: 'Vamos a celebrar esta noche', - vn: 'On va vraiment fêter à soir', - }, - }, - supabase_senses: [], - supabase_sentences: [], - }, - { - firebase_entry: { - ca: 10101010, - di: [ - 'Central Luganda', - ], - gl: { - en: 'I will see you', - es: 'Voy a verte', - }, - ii: 'v4-1715819006966', - in: '1SG-Fut-2SG-see-Fin.V', - lx: 'njakulaba', - mr: 'n-ja-ku-lab-a', - ps: [ - 'vp', - ], - ua: 10101010, - }, - supabase_senses: [], - supabase_sentences: [], - }, - { - firebase_entry: { - ca: 10101010, - gl: { - en: 'bye', - es: 'adiós', - }, - ii: 'v4-1715819006966', - lx: 'vale', - ua: 10101010, - }, - supabase_senses: [], - supabase_sentences: [], - }, - ], - ) - }) - - test('does not duplicate vernacular', () => { - const csv_rows_without_header: Row[] = [ - { - vernacular_exampleSentence: 'Hello world', - }, - ] - const entries = csv_rows_without_header.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp })) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "firebase_entry": { - "ca": 10101010, - "gl": {}, - "ii": "v4-1715819006966", - "lx": undefined, - "ua": 10101010, - "xs": { - "vn": "Hello world", - }, - }, - "supabase_senses": [], - "supabase_sentences": [], - }, - ] - `) - }) -}) - -describe('convertJsonRowToEntryFormat with senses', () => { - const fakeTimeStamp = 10101010 as unknown as Timestamp - const fakeDateStamp = 1715819006966 - - test('multiple senses (glosses))', () => { - const csv_rows_with_senses: Row[] = [ - { - 'lexeme': '𒄧𒂸', - 'es_gloss': 'delfín', - 'en_gloss': 'dolphin', - 's2.es_gloss': 'pez', - 's2.en_gloss': 'fish', - 's3.en_gloss': 'marine mammal', - 's4.en_gloss': 'mythological creature', - 's4.es_gloss': 'creatura mitológica', - 's4.fr_gloss': 'créature mythologique', - }, - ] - const entries = csv_rows_with_senses.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp, test: true })) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "en": "dolphin", - "es": "delfín", - }, - "ii": "v4-1715819006966", - "lx": "𒄧𒂸", - "ua": 10101010, - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "en": "fish", - "es": "pez", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - { - "sense": { - "glosses": { - "new": { - "en": "marine mammal", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111102", - }, - { - "sense": { - "glosses": { - "new": { - "en": "mythological creature", - "es": "creatura mitológica", - "fr": "créature mythologique", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111103", - }, - ], - "supabase_sentences": [], - }, - ] - `) - }) - - test('senses with sentences', () => { - const csv_rows_with_sentences: Row[] = [ - { - 'lexeme': '𒄧𒂸', - 'en_gloss': 'dolphin', - 's2.en_gloss': 'fish', - 's2.default_vernacular_exampleSentence': '𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧', - 's2.en_exampleSentence': 'The fish is swimmmimg', - 's2.es_exampleSentence': 'El pez está nadando', - }, - ] - const entries = csv_rows_with_sentences.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp, test: true })) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "en": "dolphin", - }, - "ii": "v4-1715819006966", - "lx": "𒄧𒂸", - "ua": 10101010, - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "en": "fish", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - ], - "supabase_sentences": [ - { - "sense_id": "11111111-1111-1111-1111-111111111100", - "sentence": { - "text": { - "new": { - "default": "𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧", - }, - }, - "translation": { - "new": { - "en": "The fish is swimmmimg", - "es": "El pez está nadando", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111102", - }, - ], - }, - ] - `) - }) - - test('senses with the rest fields', () => { - const csv_rows_with_other_fields: Row[] = [ - { - 'lexeme': 'foo', - 'en_gloss': 'test', - 's2.en_gloss': 'example', - 's2.partOfSpeech': 'n', - 's2.semanticDomain': '1.1', - 's2.nounClass': 'S', - }, - ] - const entries = csv_rows_with_other_fields.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp, test: true })) - expect(entries).toMatchInlineSnapshot(` - [ - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "en": "test", - }, - "ii": "v4-1715819006966", - "lx": "foo", - "ua": 10101010, - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "en": "example", - }, - }, - "noun_class": { - "new": "S", - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - "semantic_domains": { - "new": [ - "1.1", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - ], - "supabase_sentences": [], - }, - ] - `) - }) - - test('wrong order in senses', () => { - const csv_rows_with_senses: Row[] = [ - { - 'lexeme': '𒂸', - 'es_gloss': 'sopa', - 'en_gloss': 'soup', - 's2.es_gloss': 'agua', - 's3.es_gloss': 'líquido', - 's3.en_gloss': 'liquid', - 's2.en_gloss': 'water', - }, - ] - const entries = csv_rows_with_senses.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp, test: true })) - - expect(entries).not.toEqual( - [ - { - firebase_entry: { - ca: 10101010, - gl: { - en: 'soup', - es: 'sopa', - }, - ii: 'v4-1715819006966', - lx: '𒂸', - ua: 10101010, - }, - supabase_senses: [ - { - sense: { - glosses: { - new: { - es: 'agua', - en: 'water', - }, - }, - }, - sense_id: '11111111-1111-1111-1111-111111111100', - }, - { - sense: { - glosses: { - new: { - en: 'liquid', - es: 'líquido', - }, - }, - }, - sense_id: '11111111-1111-1111-1111-111111111102', - }, - ], - supabase_sentences: [], - }, - ], - ) - }) - - test('senses with multiple sentences and last vernacular sentence without its translations', () => { - const csv_rows_with_sentences: Row[] = [ - { - 'lexeme': '𒄧𒂸', - 'en_gloss': 'dolphin', - 's2.en_gloss': 'fish', - 's2.default_vernacular_exampleSentence': '𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧', - 's2.en_exampleSentence': 'The fish is swimmming', - 's2.es_exampleSentence': 'El pez está nadando', - 's3.en_gloss': 'swim', - 's3.default_vernacular_exampleSentence': '𒂸𒂸𒄧', - 's3.en_exampleSentence': 'I swim', - 's4.en_gloss': 'test', - 's4.default_vernacular_exampleSentence': '𒂸𒂸 𒂸𒂸 𒂸𒂸', - }, - ] - const entries = csv_rows_with_sentences.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp, test: true })) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "en": "dolphin", - }, - "ii": "v4-1715819006966", - "lx": "𒄧𒂸", - "ua": 10101010, - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "en": "fish", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - { - "sense": { - "glosses": { - "new": { - "en": "swim", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111103", - }, - { - "sense": { - "glosses": { - "new": { - "en": "test", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111105", - }, - ], - "supabase_sentences": [ - { - "sense_id": "11111111-1111-1111-1111-111111111100", - "sentence": { - "text": { - "new": { - "default": "𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧", - }, - }, - "translation": { - "new": { - "en": "The fish is swimmming", - "es": "El pez está nadando", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111102", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111103", - "sentence": { - "text": { - "new": { - "default": "𒂸𒂸𒄧", - }, - }, - "translation": { - "new": { - "en": "I swim", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111104", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111105", - "sentence": { - "text": { - "new": { - "default": "𒂸𒂸 𒂸𒂸 𒂸𒂸", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111106", - }, - ], - }, - ] - `) - }) - test('multiple vernacular and translations sentences that belongs to a same sense', () => { - const csv_rows_with_sentences: Row[] = [ - { - 'lexeme': '𒄧𒂸', - 'en_gloss': 'dolphin', - 's2.en_gloss': 'fish', - 's2.default_vernacular_exampleSentence': '𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧', - 's2.en_exampleSentence': 'The fish is swimmming', - 's2.es_exampleSentence': 'El pez está nadando', - 's2.default_vernacular_exampleSentence.2': '𒂸 𒂸𒂸𒂸 𒄧𒄧𒄧 𒄧', - 's3.en_gloss': 'swim', - 's3.default_vernacular_exampleSentence': '𒂸𒂸𒄧', - 's3.en_exampleSentence': 'I swim', - 's3.default_vernacular_exampleSentence.2': '𒄧𒂸 𒂸𒄧', - 's3.en_exampleSentence.2': 'He swam', - 's3.es_exampleSentence.2': 'Él nadó', - 's3.it_exampleSentence.2': 'egli nuotava', - 's3.default_vernacular_exampleSentence.3': '𒂸 𒄧𒄧 𒂸', - 's3.es_exampleSentence.3': 'Él nadará', - 's3.en_exampleSentence.3': 'He will swim', - 's4.en_gloss': 'test', - 's4.default_vernacular_exampleSentence': '𒂸𒂸 𒂸𒂸 𒂸𒂸', - }, - ] - const entries = csv_rows_with_sentences.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp, test: true })) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "en": "dolphin", - }, - "ii": "v4-1715819006966", - "lx": "𒄧𒂸", - "ua": 10101010, - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "en": "fish", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - { - "sense": { - "glosses": { - "new": { - "en": "swim", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111104", - }, - { - "sense": { - "glosses": { - "new": { - "en": "test", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111108", - }, - ], - "supabase_sentences": [ - { - "sense_id": "11111111-1111-1111-1111-111111111100", - "sentence": { - "text": { - "new": { - "default": "𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧", - }, - }, - "translation": { - "new": { - "en": "The fish is swimmming", - "es": "El pez está nadando", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111102", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111100", - "sentence": { - "text": { - "new": { - "default": "𒂸 𒂸𒂸𒂸 𒄧𒄧𒄧 𒄧", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111103", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111104", - "sentence": { - "text": { - "new": { - "default": "𒂸𒂸𒄧", - }, - }, - "translation": { - "new": { - "en": "I swim", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111105", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111104", - "sentence": { - "text": { - "new": { - "default": "𒄧𒂸 𒂸𒄧", - }, - }, - "translation": { - "new": { - "en": "He swam", - "es": "Él nadó", - "it": "egli nuotava", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111106", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111104", - "sentence": { - "text": { - "new": { - "default": "𒂸 𒄧𒄧 𒂸", - }, - }, - "translation": { - "new": { - "en": "He will swim", - "es": "Él nadará", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111107", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111108", - "sentence": { - "text": { - "new": { - "default": "𒂸𒂸 𒂸𒂸 𒂸𒂸", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111109", - }, - ], - }, - ] - `) - }) - - test('high-level conversion from csv with senses', () => { - const dictionaryId = 'example-v4-senses' - const file = readFileSync(path.join(__dirname, `./data/${dictionaryId}/${dictionaryId}.csv`), 'utf8') - const rows = parseCSVFrom(file) - const entries = rows.map(row => convert_row_to_objects_for_databases({ row, import_id, timestamp: fakeTimeStamp, test: true })) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "es": "sol", - }, - "ii": "v4-1715819006966", - "lx": "kꞌahkꞌal", - "nt": "16/jul./2019. Bachajon", - "ps": [ - "n", - ], - "ua": 10101010, - "va": "kꞌajkꞌal", - "xs": { - "es": "Ya salió el sol", - "vn": "Lokꞌix tal kꞌahkꞌal", - }, - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "es": "fiebre", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - { - "sense": { - "glosses": { - "new": { - "es": "día", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111103", - }, - { - "sense": { - "glosses": { - "new": { - "es": "calor", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111106", - }, - ], - "supabase_sentences": [ - { - "sense_id": "11111111-1111-1111-1111-111111111100", - "sentence": { - "text": { - "new": { - "default": "Ay ta kꞌahkꞌal te chꞌin alale", - }, - }, - "translation": { - "new": { - "es": "El niño tiene fiebre", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111102", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111103", - "sentence": { - "text": { - "new": { - "default": "Cheb kꞌahkꞌal ya x-aꞌtejotik", - }, - }, - "translation": { - "new": { - "es": "Trabajaremos dos días", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111105", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111106", - "sentence": { - "text": { - "new": { - "default": "Toyol kꞌahkꞌal ya kaꞌiy", - }, - }, - "translation": { - "new": { - "es": "Siento mucho calor", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111108", - }, - ], - }, - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "es": "sol", - }, - "ii": "v4-1715819006966", - "lx": "kꞌaal", - "nt": "26/dic./2020", - "ps": [ - "n", - ], - "ua": 10101010, - "va": "kꞌahkꞌal", - "xs": { - "es": "Que bueno, ya salió el sol", - "vn": "Jaꞌnix lek-a lokꞌix tel kꞌaal", - }, - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "es": "fiebre", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - { - "sense": { - "glosses": { - "new": { - "es": "día", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111103", - }, - { - "sense": { - "glosses": { - "new": { - "es": "calor", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111106", - }, - ], - "supabase_sentences": [ - { - "sense_id": "11111111-1111-1111-1111-111111111100", - "sentence": { - "text": { - "new": { - "default": "Ay bayal skꞌaal te chꞌin x-Ixchele", - }, - }, - "translation": { - "new": { - "es": "Mi hijita Ixchel tiene mucha fiebre", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111102", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111103", - "sentence": { - "text": { - "new": { - "default": ""Bajtꞌix kꞌaal mamtik, yorailix ichꞌ lewa"", - }, - }, - "translation": { - "new": { - "es": "Ya transcurrió el día mi estimado señor, es momento de tomar un descanso", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111105", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111106", - "sentence": { - "text": { - "new": { - "default": "Toyol kꞌaal ya jkaꞌiy", - }, - }, - "translation": { - "new": { - "es": "Siento mucho calor", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111108", - }, - ], - }, - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "es": "sol", - }, - "ii": "v4-1715819006966", - "lx": "kꞌajkꞌal", - "nt": "14/dic./2019", - "ps": [ - "n", - ], - "ua": 10101010, - "va": "kꞌahkꞌal", - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "es": "día", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - { - "sense": { - "glosses": { - "new": { - "es": "calor", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111102", - }, - { - "sense": { - "glosses": { - "new": { - "es": "fiebre", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111103", - }, - ], - "supabase_sentences": [], - }, - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "es": "fuego", - }, - "ii": "v4-1715819006966", - "lx": "kꞌajkꞌ", - "nt": "23/sep./2023", - "ps": [ - "n", - ], - "ua": 10101010, - "va": "kꞌahkꞌ", - "xs": { - "es": "Ya hice el fuego", - "vn": "Tilix kuꞌun-i kꞌajkꞌi", - }, - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "es": "bravo", - }, - }, - "parts_of_speech": { - "new": [ - "adj", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - { - "sense": { - "glosses": { - "new": { - "es": "fiebre", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111103", - }, - { - "sense": { - "glosses": { - "new": { - "es": "caliente", - }, - }, - "parts_of_speech": { - "new": [ - "adj", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111106", - }, - ], - "supabase_sentences": [ - { - "sense_id": "11111111-1111-1111-1111-111111111100", - "sentence": { - "text": { - "new": { - "default": "Lom kꞌajkꞌ te mamal jkaxlane", - }, - }, - "translation": { - "new": { - "es": "El mestizo es muy bravo", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111102", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111103", - "sentence": { - "text": { - "new": { - "default": "Tsakbil ta kꞌajkꞌ te alale", - }, - }, - "translation": { - "new": { - "es": "El bebé tiene mucha fiebre", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111105", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111106", - "sentence": { - "text": { - "new": { - "default": "El café está caliente, tómalo despacio", - }, - }, - "translation": { - "new": { - "es": "Kꞌajkꞌ te kajpele, kꞌume xa awuchꞌ", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111108", - }, - ], - }, - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "es": "libro", - }, - "ii": "v4-1715819006966", - "lx": "jun", - "nt": "26/sep./2023", - "ps": [ - "n", - ], - "ua": 10101010, - "xs": { - "es": "¿Qué haces? - Estoy leyendo un libro", - "vn": "¿Beluk apas? - Yakalon ta skꞌoponel jun", - }, - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "es": "cuaderno", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - { - "sense": { - "glosses": { - "new": { - "es": "documento", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111103", - }, - { - "sense": { - "glosses": { - "new": { - "es": "papel", - }, - }, - "parts_of_speech": { - "new": [ - "n", - ], - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111106", - }, - ], - "supabase_sentences": [ - { - "sense_id": "11111111-1111-1111-1111-111111111100", - "sentence": { - "text": { - "new": { - "default": "La jta ta kitsel te june", - }, - }, - "translation": { - "new": { - "es": "Alcancé a rayar mi cuaderno", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111102", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111103", - "sentence": { - "text": { - "new": { - "default": "Maꞌme xa awochꞌ te ajune", - }, - }, - "translation": { - "new": { - "es": "No vayas a arrugar tu documento", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111105", - }, - { - "sense_id": "11111111-1111-1111-1111-111111111106", - "sentence": { - "text": { - "new": { - "default": "Zoe rompió el papel", - }, - }, - "translation": { - "new": { - "es": "La schꞌiꞌ jun te Zoe", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111108", - }, - ], - }, - { - "firebase_entry": { - "ca": 10101010, - "gl": { - "es": "abierto", - }, - "ii": "v4-1715819006966", - "lx": "jeꞌel", - "nt": "08/abr./2019", - "ps": [ - "adj", - ], - "ua": 10101010, - "va": "makal", - "xs": { - "es": "La puerta de mi casa quedó abierta", - "vn": "Jeꞌel jilel stiꞌ jna", - }, - }, - "supabase_senses": [ - { - "sense": { - "glosses": { - "new": { - "es": "abrir", - }, - }, - }, - "sense_id": "11111111-1111-1111-1111-111111111100", - }, - ], - "supabase_sentences": [ - { - "sense_id": "11111111-1111-1111-1111-111111111100", - "sentence": { - "text": { - "new": { - "default": "Jeꞌa tel tebuk i tiꞌnai ay bayal kꞌaal", - }, - }, - "translation": { - "new": { - "es": ""Abre un poco la puerta, hace mucho calor"", - }, - }, - }, - "sentence_id": "11111111-1111-1111-1111-111111111102", - }, - ], - }, - ] - `) - }) -}) - -function removeHeaderRow(rows: Row[]) { - return rows.splice(1) -} diff --git a/packages/scripts/import/convert_row_to_objects_for_databases.ts b/packages/scripts/import/convert_row_to_objects_for_databases.ts deleted file mode 100644 index decc062f3..000000000 --- a/packages/scripts/import/convert_row_to_objects_for_databases.ts +++ /dev/null @@ -1,195 +0,0 @@ -import { randomUUID } from 'node:crypto' -import type { ActualDatabaseEntry, ContentUpdateRequestBody } from '@living-dictionaries/types' -import type { Timestamp } from 'firebase/firestore' -import type { Row } from './row.type' - -export function convert_row_to_objects_for_databases({ row, import_id, timestamp, test = false }: { - row: Row - import_id: string - timestamp?: FirebaseFirestore.FieldValue - test?: boolean -}): { - firebase_entry: ActualDatabaseEntry - supabase_senses: { - sense_id: string - sense: ContentUpdateRequestBody['change']['sense'] - }[] - supabase_sentences: { - sentence_id: string - sense_id: string - sentence: ContentUpdateRequestBody['change']['sentence'] - }[] - } { - const sense_regex = /^s\d+\./ - const multiple_sentence_regex = /_exampleSentence\.\d+$/ - const has_multiple_sentence_regex_label = (key: string) => multiple_sentence_regex.test(key) - const firebase_entry: ActualDatabaseEntry = { lx: row.lexeme, gl: {}, xs: {} } - interface SupabaseSense { - sense_id: string - sense: ContentUpdateRequestBody['change']['sense'] - } - interface SupabaseSentence { - sentence_id: string - sense_id: string - sentence: ContentUpdateRequestBody['change']['sentence'] - } - const uuid_template = '11111111-1111-1111-1111-111111111111' - let current_uuid_index = 0 - function incremental_consistent_uuid() { - return test ? uuid_template.slice(0, -2) + (current_uuid_index++).toString().padStart(2, '0') : randomUUID() - } - const supabase_sense: SupabaseSense = { - sense_id: incremental_consistent_uuid(), - sense: {}, - } - const supabase_sentence: SupabaseSentence = { - sentence_id: incremental_consistent_uuid(), - sense_id: supabase_sense.sense_id, - sentence: {}, - } - const supabase_senses = [] - const supabase_sentences = [] - let old_key = 2 - let old_language_key - let new_language_key - - if (row.phonetic) firebase_entry.ph = row.phonetic - if (row.morphology) firebase_entry.mr = row.morphology - if (row.interlinearization) firebase_entry.in = row.interlinearization - if (row.partOfSpeech) firebase_entry.ps = returnArrayFromCommaSeparatedItems(row.partOfSpeech) - if (row.dialects) firebase_entry.di = row.dialects.split(',').map(dialect => dialect.trim()) - if (row.variant) firebase_entry.va = row.variant - if (row.nounClass) firebase_entry.nc = row.nounClass - if (row.source) firebase_entry.sr = row.source.split('|') - if (row.pluralForm) firebase_entry.pl = row.pluralForm - if (row.scientificName) firebase_entry.scn = [row.scientificName] - if (row.semanticDomain_custom) firebase_entry.sd = [row.semanticDomain_custom] - if (row.ID) firebase_entry.ei = row.ID - - if (row.localOrthography) firebase_entry.lo1 = row.localOrthography - if (row['localOrthography.2']) firebase_entry.lo2 = row['localOrthography.2'] - if (row['localOrthography.3']) firebase_entry.lo3 = row['localOrthography.3'] - if (row['localOrthography.4']) firebase_entry.lo4 = row['localOrthography.4'] - if (row['localOrthography.5']) firebase_entry.lo5 = row['localOrthography.5'] - - if (row.notes) firebase_entry.nt = row.notes - - for (const [k, v] of Object.entries(row)) { - const key: keyof Row = k as keyof Row - const value: string = v as string - if (!value) continue - - // gloss fields are labeled using bcp47 language codes followed by '_gloss' (e.g. es_gloss, tpi_gloss) - if (key.includes('_gloss') && !sense_regex.test(key)) { - const [language] = key.split('_gloss') - firebase_entry.gl[language] = value - } - - if (key.includes('vernacular_exampleSentence') && !sense_regex.test(key)) { - firebase_entry.xs.vn = value - continue // to keep next block from also adding - } - - // example sentence fields are codes followed by '_exampleSentence' - if (key.includes('_exampleSentence') && !sense_regex.test(key)) { - const [language] = key.split('_exampleSentence') - firebase_entry.xs[language] = value - } - - if (sense_regex.test(key)) { - if (key.includes('_gloss')) { - let language_key = key.replace(sense_regex, '') - language_key = language_key.replace('_gloss', '') - - if (key === `s${old_key}.${language_key}_gloss`) { - supabase_sense.sense = { glosses: { new: { ...supabase_sense.sense?.glosses?.new, [language_key]: row[key] } } } - } else { - old_key++ - supabase_sense.sense_id = incremental_consistent_uuid() - supabase_sense.sense = { glosses: { ...supabase_sense.sense.glosses, new: { [language_key]: row[key] } } } - } - } - if (key.includes('_vernacular_exampleSentence')) { - let writing_system = key.replace(sense_regex, '') - writing_system = writing_system.replace('_vernacular_exampleSentence', '') - if (has_multiple_sentence_regex_label(key)) writing_system = writing_system.slice(0, writing_system.lastIndexOf('.')) - - if (key === `s${old_key}.${writing_system}_vernacular_exampleSentence` || has_multiple_sentence_regex_label(key)) { - supabase_sentence.sense_id = supabase_sense.sense_id - supabase_sentence.sentence_id = incremental_consistent_uuid() - if (key === `s${old_key}.${writing_system}_vernacular_exampleSentence` && !has_multiple_sentence_regex_label(key)) { - supabase_sentence.sentence = { text: { new: { ...supabase_sentence?.sentence?.text?.new, [writing_system]: row[key] } } } - } else if (has_multiple_sentence_regex_label(key)) { - supabase_sentence.sentence = { text: { new: { [writing_system]: row[key] } } } - } - } - } - if (key.includes('_exampleSentence') && !key.includes('_vernacular')) { // when key is a translated example sentence - new_language_key = key.replace(sense_regex, '') - new_language_key = new_language_key.replace('_exampleSentence', '') - if (has_multiple_sentence_regex_label(key)) new_language_key = new_language_key.slice(0, new_language_key.lastIndexOf('.')) - if (old_language_key && old_language_key === new_language_key && !has_multiple_sentence_regex_label(key)) supabase_sentence.sentence_id = incremental_consistent_uuid() - if (!old_language_key) old_language_key = new_language_key - if (key === `s${old_key}.${new_language_key}_exampleSentence` || has_multiple_sentence_regex_label(key)) { - supabase_sentence.sentence = { ...supabase_sentence.sentence, translation: { new: { ...supabase_sentence?.sentence?.translation?.new, [new_language_key]: row[key] } } } - } - } - if (key.includes('_exampleSentence')) { // in this case this includes verncaular and traslated example sentences - const sentence_index: number = supabase_sentences.findIndex(sentence => sentence.sentence_id === supabase_sentence.sentence_id) - const sense_index: number = supabase_sentences.findIndex(sentence => sentence.sense_id === supabase_sentence.sense_id) - const sense_index_exists = sense_index !== -1 - const sentence_index_exists = sentence_index !== -1 - if (sense_index_exists && !has_multiple_sentence_regex_label(key)) { - supabase_sentences[sense_index] = { ...supabase_sentence } - } else if (sentence_index_exists) { - supabase_sentences[sentence_index] = { ...supabase_sentence } - } else { - supabase_sentences.push({ ...supabase_sentence }) - } - } - old_language_key = new_language_key - if (key.includes('.partOfSpeech')) - supabase_sense.sense = { ...supabase_sense.sense, parts_of_speech: { new: [row[key]] } } - - if (key.includes('.semanticDomain')) - supabase_sense.sense = { ...supabase_sense.sense, semantic_domains: { new: [row[key]] } } - - if (key.includes('.nounClass')) - supabase_sense.sense = { ...supabase_sense.sense, noun_class: { new: row[key] } } - } - - if (sense_regex.test(key)) { - const index: number = supabase_senses.findIndex(sense => sense.sense_id === supabase_sense.sense_id) - const sense_index_exists = index !== -1 - if (sense_index_exists) { - supabase_senses[index] = { ...supabase_sense } - } else { - supabase_senses.push({ ...supabase_sense }) - } - } - - const semanticDomain_FOLLOWED_BY_OPTIONAL_DIGIT = /^semanticDomain(?:\.\d)*$/ // semanticDomain, semanticDomain2, semanticDomain<#>, but not semanticDomain_custom - if (semanticDomain_FOLLOWED_BY_OPTIONAL_DIGIT.test(key)) { - if (!firebase_entry.sdn) firebase_entry.sdn = [] - - firebase_entry.sdn.push(value.toString()) - } - } - - if (Object.keys(firebase_entry.xs).length === 0) - delete firebase_entry.xs - - firebase_entry.ii = import_id - firebase_entry.ca = timestamp as Timestamp - firebase_entry.ua = timestamp as Timestamp - - return { - firebase_entry, - supabase_senses, - supabase_sentences, - } -} - -export function returnArrayFromCommaSeparatedItems(string: string): string[] { - return string?.split(',').map(item => item.trim()) || [] -} diff --git a/packages/scripts/import/data/example-v4-senses/example-v4-senses.csv b/packages/scripts/import/data/example-v4-senses/example-v4-senses.csv index 7e0a52d99..cb4f919e4 100644 --- a/packages/scripts/import/data/example-v4-senses/example-v4-senses.csv +++ b/packages/scripts/import/data/example-v4-senses/example-v4-senses.csv @@ -1,7 +1,7 @@ lexeme,variant,es_gloss,partOfSpeech ,vernacular_exampleSentence ,es_exampleSentence ,s2.es_gloss,s2.partOfSpeech,s2.default_vernacular_exampleSentence,s2.es_exampleSentence,s3.es_gloss,s3.partOfSpeech,s3.default_vernacular_exampleSentence,s3.es_exampleSentence,s4.es_gloss,s4.partOfSpeech,s4.default_vernacular_exampleSentence,s4.es_exampleSentence,notes kꞌahkꞌal,kꞌajkꞌal,sol,n,Lokꞌix tal kꞌahkꞌal,Ya salió el sol,fiebre,n,Ay ta kꞌahkꞌal te chꞌin alale,El niño tiene fiebre,día,n,Cheb kꞌahkꞌal ya x-aꞌtejotik,Trabajaremos dos días,calor,n,Toyol kꞌahkꞌal ya kaꞌiy,Siento mucho calor,16/jul./2019. Bachajon -kꞌaal,kꞌahkꞌal,sol,n,Jaꞌnix lek-a lokꞌix tel kꞌaal,"Que bueno, ya salió el sol",fiebre,n,Ay bayal skꞌaal te chꞌin x-Ixchele,Mi hijita Ixchel tiene mucha fiebre,día,n,"""Bajtꞌix kꞌaal mamtik, yorailix ichꞌ lewa""","Ya transcurrió el día mi estimado señor, es momento de tomar un descanso",calor,n,Toyol kꞌaal ya jkaꞌiy,Siento mucho calor,26/dic./2020 +kꞌaal,kꞌahkꞌal,sol,n,Jaꞌnix lek-a lokꞌix tel kꞌaal,"Que bueno, ya salió el sol",fiebre,n,Ay bayal skꞌaal te chꞌin x-Ixchele,Mi hijita Ixchel tiene mucha fiebre,día,n,"Bajtix kaal mamtik, yorailix ich lewa","Ya transcurrió el día mi estimado señor, es momento de tomar un descanso",calor,n,Toyol kꞌaal ya jkaꞌiy,Siento mucho calor,26/dic./2020 kꞌajkꞌal,kꞌahkꞌal,sol,n,,,día,n,,,calor,n,,,fiebre,n,,,14/dic./2019 kꞌajkꞌ,kꞌahkꞌ,fuego,n,Tilix kuꞌun-i kꞌajkꞌi,Ya hice el fuego,bravo,adj,Lom kꞌajkꞌ te mamal jkaxlane,El mestizo es muy bravo,fiebre,n,Tsakbil ta kꞌajkꞌ te alale,El bebé tiene mucha fiebre,caliente,adj,"El café está caliente, tómalo despacio","Kꞌajkꞌ te kajpele, kꞌume xa awuchꞌ",23/sep./2023 jun,,libro,n,¿Beluk apas? - Yakalon ta skꞌoponel jun,¿Qué haces? - Estoy leyendo un libro,cuaderno,n,La jta ta kitsel te june,Alcancé a rayar mi cuaderno,documento,n,Maꞌme xa awochꞌ te ajune,No vayas a arrugar tu documento,papel,n,Zoe rompió el papel,La schꞌiꞌ jun te Zoe,26/sep./2023 -jeꞌel,makal,abierto,adj,Jeꞌel jilel stiꞌ jna,La puerta de mi casa quedó abierta,abrir,,Jeꞌa tel tebuk i tiꞌnai ay bayal kꞌaal,"""Abre un poco la puerta, hace mucho calor""",,,,,,,,,08/abr./2019 \ No newline at end of file +jeꞌel,makal,abierto,adj,Jeꞌel jilel stiꞌ jna,La puerta de mi casa quedó abierta,abrir,,Jeꞌa tel tebuk i tiꞌnai ay bayal kꞌaal,"Abre un poco la puerta, hace mucho calor",,,,,,,,,08/abr./2019 \ No newline at end of file diff --git a/packages/scripts/import/generate-sql-statements.ts b/packages/scripts/import/generate-sql-statements.ts new file mode 100644 index 000000000..e5edf127a --- /dev/null +++ b/packages/scripts/import/generate-sql-statements.ts @@ -0,0 +1,362 @@ +import { randomUUID } from 'node:crypto' +import type { MultiString, TablesInsert } from '@living-dictionaries/types' +import type { ImportContentUpdate } from '@living-dictionaries/types/supabase/content-import.interface' +import { diego_ld_user_id } from '../config-supabase' +import type { Number_Suffix, Row, Sense_Prefix } from './row.type' +import { sql_file_string } from './to-sql-string' +import { millisecond_incrementing_timestamp } from './incrementing-timestamp' + +export interface Upload_Operations { + upload_photo: (filepath: string, entry_id: string) => Promise<{ storage_path: string, serving_url: string }> + upload_audio: (filepath: string, entry_id: string) => Promise<{ storage_path: string }> + // upload_video: (filepath: string) => Promise<{ storage_path: string }> +} + +export async function generate_sql_statements({ + row, + dictionary_id, + import_id, + speakers, + dialects, + tags, + upload_operations: { + upload_photo, + upload_audio, + // upload_video, + }, +}: { + row: Row + dictionary_id: string + import_id: string + speakers: { id: string, name: string }[] + dialects: { id: string, name: MultiString }[] + tags: { id: string, name: string }[] + upload_operations: Upload_Operations +}) { + try { + let sql_statements = '' + + const entry_id = randomUUID() + + const c_meta = { + created_by: diego_ld_user_id, + created_at: millisecond_incrementing_timestamp(), + } + const c_u_meta = { + ...c_meta, + updated_by: c_meta.created_by, + updated_at: c_meta.created_at, + } + const assemble_content_update = ({ data, ...rest }: ImportContentUpdate) => { + const data_without_meta = { ...data } + // @ts-expect-error + delete data_without_meta.id + // @ts-expect-error + delete data_without_meta.dictionary_id + delete data_without_meta.created_at + // @ts-expect-error + delete data_without_meta.created_by + // @ts-expect-error + delete data_without_meta.updated_at + // @ts-expect-error + delete data_without_meta.updated_by + + const content_update: TablesInsert<'content_updates'> = { + ...rest, + id: randomUUID(), + import_id, + dictionary_id, + user_id: c_meta.created_by, + timestamp: c_meta.created_at, + data: data_without_meta, + } + return content_update + } + + const entry: TablesInsert<'entries'> = { + id: entry_id, + dictionary_id, + ...c_u_meta, + lexeme: { + default: row.lexeme, + ...(row.localOrthography && { lo1: row.localOrthography }), + ...(row['localOrthography.2'] && { lo2: row['localOrthography.2'] }), + ...(row['localOrthography.3'] && { lo3: row['localOrthography.3'] }), + ...(row['localOrthography.4'] && { lo4: row['localOrthography.4'] }), + ...(row['localOrthography.5'] && { lo5: row['localOrthography.5'] }), + }, + } + if (row.phonetic) entry.phonetic = row.phonetic + if (row.morphology) entry.morphology = row.morphology + if (row.source) entry.sources = row.source.split('|').map(source => source.trim()).filter(Boolean) + if (row.scientificName) entry.scientific_names = [row.scientificName] + if (row.ID) entry.elicitation_id = row.ID + if (row.notes) entry.notes = { default: row.notes } + + sql_statements += sql_file_string('entries', entry) + sql_statements += sql_file_string('content_updates', assemble_content_update({ type: 'insert_entry', entry_id, data: entry })) + + if (row.dialects) { + const dialect_strings = row.dialects.split('|').map(dialect => dialect.trim()).filter(Boolean) + for (const dialect_to_assign of dialect_strings) { + let dialect_id = dialects.find(({ name }) => name.default === dialect_to_assign)?.id + if (!dialect_id) { + dialect_id = randomUUID() + const dialect: TablesInsert<'dialects'> = { + id: dialect_id, + ...c_u_meta, + dictionary_id, + name: { default: dialect_to_assign }, + } + sql_statements += sql_file_string('dialects', dialect) + dialects.push({ id: dialect.id, name: dialect.name }) + } + + sql_statements += sql_file_string('entry_dialects', { + ...c_meta, + dialect_id, + entry_id, + }) + } + } + + if (row.tags) { + const tag_strings = row.tags.split('|').map(tag => tag.trim()).filter(Boolean) + for (const tag_to_assign of tag_strings) { + let tag_id = tags.find(({ name }) => name === tag_to_assign)?.id + if (!tag_id) { + tag_id = randomUUID() + const tag: TablesInsert<'tags'> = { + id: tag_id, + ...c_u_meta, + dictionary_id, + name: tag_to_assign, + } + sql_statements += sql_file_string('tags', tag) + tags.push({ id: tag.id, name: tag.name }) + } + + sql_statements += sql_file_string('entry_tags', { + ...c_meta, + tag_id, + entry_id, + }) + } + } + + const senses: TablesInsert<'senses'>[] = [] + const sentences: TablesInsert<'sentences'>[] = [] + const senses_in_sentences: TablesInsert<'senses_in_sentences'>[] = [] + + const row_entries = (Object.entries(row) as [keyof Row, string][]) + .sort(([keyA], [keyB]) => keyA.localeCompare(keyB)) + + const first_sense_label = 's1' + const sense_labels = new Set([first_sense_label]) // always have at least one sense + const sense_regex = /^(?s\d+)\./ + for (const key of Object.keys(row)) { + const match = key.match(sense_regex) + if (match) sense_labels.add(match.groups.sense_index) + } + + for (const sense_label of sense_labels) { + const sense_id = randomUUID() + + const sense: TablesInsert<'senses'> = { + entry_id, + ...c_u_meta, + id: sense_id, + glosses: {}, + } + + const currently_on_first_sense = sense_label === first_sense_label + const sense_prefix = currently_on_first_sense ? '' : `${sense_label}.` as Sense_Prefix + + for (const [key, value] of row_entries) { + if (!value) continue + + if (currently_on_first_sense) { + const key_has_secondary_sense_label = !!key.match(sense_regex) + if (key_has_secondary_sense_label) continue + } else if (!key.startsWith(sense_prefix)) { + continue + } + + if (key.endsWith('nounClass')) sense.noun_class = value + if (key.endsWith('variant')) sense.variant = { default: value } + if (key.endsWith('pluralForm')) sense.plural_form = { default: value } + + if (key.includes('partOfSpeech')) { + if (!sense.parts_of_speech) sense.parts_of_speech = [] + sense.parts_of_speech.push(value) + } + if (key.includes('semanticDomain')) { + if (!sense.semantic_domains) sense.semantic_domains = [] + sense.semantic_domains.push(value) + } + + const key_without_prefix = key.replace(sense_prefix, '') + if (key.endsWith('_gloss')) { + const language = key_without_prefix.replace('_gloss', '') + sense.glosses[language] = value + } + } + + senses.push(sense) + + const sense_sentence_number_suffix = new Set() + + for (const [key, value] of row_entries) { + if (!key.includes('_exampleSentence')) continue + if (!value) continue + + if (currently_on_first_sense) { + const key_has_secondary_sense_label = !!key.match(sense_regex) + if (key_has_secondary_sense_label) continue + } else if (!key.startsWith(sense_prefix)) { + continue + } + + const number_suffix_with_period = key.replace(/.*_exampleSentence/, '') as Number_Suffix + sense_sentence_number_suffix.add(number_suffix_with_period) + } + + for (const sentence_suffix of sense_sentence_number_suffix) { + const sentence_id = randomUUID() + const sentence: TablesInsert<'sentences'> = { + dictionary_id, + ...c_u_meta, + id: sentence_id, + text: {}, + } + + for (const [key, value] of row_entries) { + if (!key.includes('_exampleSentence')) continue + if (!value) continue + + // ensure key has sense_prefix + if (currently_on_first_sense) { + const key_has_secondary_sense_label = !!key.match(sense_regex) + if (key_has_secondary_sense_label) continue + } else if (!key.startsWith(sense_prefix)) { + continue + } + + // ensure key has sentence_suffix + if (sentence_suffix === '') { + if (!key.endsWith('_exampleSentence')) continue + } else if (!key.endsWith(sentence_suffix)) { + continue + } + + const key_without_prefix = key.replace(sense_prefix, '') + const key_without_prefix_nor_suffix = key_without_prefix.replace(sentence_suffix, '') + if (key.includes('_vernacular_exampleSentence')) { + const writing_system = key_without_prefix_nor_suffix.replace('_vernacular_exampleSentence', '') + sentence.text[writing_system] = value + } else if (key.endsWith('_exampleSentence')) { + if (!sentence.translation) sentence.translation = {} + const language = key_without_prefix_nor_suffix.replace('_exampleSentence', '') + sentence.translation[language] = value + } + } + + sentences.push(sentence) + senses_in_sentences.push({ + ...c_meta, + sentence_id, + sense_id, + }) + } + } + + for (const sense of senses) { + sql_statements += sql_file_string('senses', sense) + } + + for (const sentence of sentences) { + sql_statements += sql_file_string('sentences', sentence) + } + + for (const connection of senses_in_sentences) { + sql_statements += sql_file_string('senses_in_sentences', connection) + } + + for (const [key, value] of row_entries) { + if (!key.includes('soundFile')) continue + if (!value) continue + + const { storage_path } = await upload_audio(value, entry_id) + const audio_id = randomUUID() + const audio: TablesInsert<'audio'> = { + ...c_u_meta, + id: audio_id, + dictionary_id, + entry_id, + storage_path, + } + sql_statements += sql_file_string('audio', audio) + + // TODO: the code above will properly import multiple audio files to the same entry but the code below will only import the metadata from the first audio file. Late on when adding multiple audio import ability, use the next line to get the number suffix from the key + // const number_suffix_with_period = key.replace('soundFile', '') as Number_Suffix + if (row.speakerName) { + let speaker_id = speakers.find(({ name }) => name === row.speakerName)?.id + if (!speaker_id) { + speaker_id = randomUUID() + + const speaker: TablesInsert<'speakers'> = { + ...c_u_meta, + id: speaker_id, + dictionary_id, + name: row.speakerName, + birthplace: row.speakerHometown || '', + decade: Number.parseInt(row.speakerAge) || null, + gender: row.speakerGender as 'm' | 'f' | 'o' || null, + } + + sql_statements += sql_file_string('speakers', speaker) + speakers.push({ id: speaker_id, name: row.speakerName }) + } + + sql_statements += sql_file_string('audio_speakers', { + ...c_meta, + audio_id, + speaker_id, + }) + } + } + + if (row.photoFile) { + const { storage_path, serving_url } = await upload_photo(row.photoFile, entry_id) + const photo_id = randomUUID() + const photo: TablesInsert<'photos'> = { + ...c_u_meta, + id: photo_id, + dictionary_id, + storage_path, + serving_url, + } + sql_statements += sql_file_string('photos', photo) + const sense_id = senses[0].id + const sense_photo: TablesInsert<'sense_photos'> = { + ...c_meta, + photo_id, + sense_id, + } + sql_statements += sql_file_string('sense_photos', sense_photo) + } + + // TablesInsert<'videos'> + // TablesInsert<'video_speakers'> + // TablesInsert<'sense_videos'> + + return `${sql_statements}\n` + } catch (err) { + console.log(`error with: ${row}: ${err}`) + console.error(err) + } +} + +function returnArrayFromCommaSeparatedItems(string: string): string[] { + return string?.split(',').map(item => item.trim()) || null +} diff --git a/packages/scripts/import/getImageServingUrl.ts b/packages/scripts/import/getImageServingUrl.ts index d8a65ccc8..52e87ea46 100644 --- a/packages/scripts/import/getImageServingUrl.ts +++ b/packages/scripts/import/getImageServingUrl.ts @@ -1,21 +1,20 @@ -import fetch from 'node-fetch'; +import fetch from 'node-fetch' +import { projectId } from '../config-firebase' -import 'dotenv/config'; // see https://github.com/motdotla/dotenv#how-do-i-use-dotenv-with-import +import 'dotenv/config' // see https://github.com/motdotla/dotenv#how-do-i-use-dotenv-with-import -export const getImageServingUrl = async (imageStoragePath: string, environment: string) => { +export async function getImageServingUrl(imageStoragePath: string, environment: string) { if (!process.env.ProcessImageUrl) - throw new Error('Missing ProcessImageUrl, is it in your uncommitted .env file?'); + throw new Error('Missing ProcessImageUrl, is it in your uncommitted .env file?') try { - const imageServingUrlEndpoint = `${process.env.ProcessImageUrl}/talking-dictionaries-${ - environment == 'prod' ? 'alpha' : 'dev' - }.appspot.com/${imageStoragePath}`; - const res = await fetch(imageServingUrlEndpoint); - const imageServingUrl = await res.text(); - return imageServingUrl.replace('http://lh3.googleusercontent.com/', ''); + const imageServingUrlEndpoint = `${process.env.ProcessImageUrl}/${projectId}.appspot.com/${imageStoragePath}` + const res = await fetch(imageServingUrlEndpoint) + const imageServingUrl = await res.text() + return imageServingUrl.replace('http://lh3.googleusercontent.com/', '') } catch (error) { - console.log(`Error getting serving url for ${imageStoragePath} on ${environment}`); + console.log(`Error getting serving url for ${imageStoragePath} on ${environment}`) // @ts-ignore - throw new Error(error); + throw new Error(error) } -}; +} diff --git a/packages/scripts/import/import-data.snap.json b/packages/scripts/import/import-data.snap.json new file mode 100644 index 000000000..203236adc --- /dev/null +++ b/packages/scripts/import/import-data.snap.json @@ -0,0 +1,547 @@ +{ + "entries": [ + { + "audios": null, + "created_at": "2024-03-08T00:44:04.6+00:00", + "deleted": null, + "dialect_ids": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100042", + "main": { + "lexeme": { + "default": "kꞌaal", + }, + "notes": { + "default": "26/dic./2020", + }, + }, + "senses": [ + { + "glosses": { + "es": "calor", + }, + "id": "11111111-1111-1111-1111-111111100050", + "parts_of_speech": [ + "n", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100051", + ], + }, + { + "glosses": { + "es": "sol", + }, + "id": "11111111-1111-1111-1111-111111100044", + "parts_of_speech": [ + "n", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100045", + ], + "variant": { + "default": "kꞌahkꞌal", + }, + }, + { + "glosses": { + "es": "fiebre", + }, + "id": "11111111-1111-1111-1111-111111100046", + "parts_of_speech": [ + "n", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100047", + ], + }, + { + "glosses": { + "es": "día", + }, + "id": "11111111-1111-1111-1111-111111100048", + "parts_of_speech": [ + "n", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100049", + ], + }, + ], + "tag_ids": null, + "updated_at": "2024-03-08T00:44:04.6+00:00", + }, + { + "audios": null, + "created_at": "2024-03-08T00:44:04.6+00:00", + "deleted": null, + "dialect_ids": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100052", + "main": { + "lexeme": { + "default": "kꞌajkꞌal", + }, + "notes": { + "default": "14/dic./2019", + }, + }, + "senses": [ + { + "glosses": { + "es": "fiebre", + }, + "id": "11111111-1111-1111-1111-111111100057", + "parts_of_speech": [ + "n", + ], + }, + { + "glosses": { + "es": "sol", + }, + "id": "11111111-1111-1111-1111-111111100054", + "parts_of_speech": [ + "n", + ], + "variant": { + "default": "kꞌahkꞌal", + }, + }, + { + "glosses": { + "es": "calor", + }, + "id": "11111111-1111-1111-1111-111111100056", + "parts_of_speech": [ + "n", + ], + }, + { + "glosses": { + "es": "día", + }, + "id": "11111111-1111-1111-1111-111111100055", + "parts_of_speech": [ + "n", + ], + }, + ], + "tag_ids": null, + "updated_at": "2024-03-08T00:44:04.6+00:00", + }, + { + "audios": null, + "created_at": "2024-03-08T00:44:04.6+00:00", + "deleted": null, + "dialect_ids": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100058", + "main": { + "lexeme": { + "default": "kꞌajkꞌ", + }, + "notes": { + "default": "23/sep./2023", + }, + }, + "senses": [ + { + "glosses": { + "es": "bravo", + }, + "id": "11111111-1111-1111-1111-111111100062", + "parts_of_speech": [ + "adj", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100063", + ], + }, + { + "glosses": { + "es": "caliente", + }, + "id": "11111111-1111-1111-1111-111111100066", + "parts_of_speech": [ + "adj", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100067", + ], + }, + { + "glosses": { + "es": "fiebre", + }, + "id": "11111111-1111-1111-1111-111111100064", + "parts_of_speech": [ + "n", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100065", + ], + }, + { + "glosses": { + "es": "fuego", + }, + "id": "11111111-1111-1111-1111-111111100060", + "parts_of_speech": [ + "n", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100061", + ], + "variant": { + "default": "kꞌahkꞌ", + }, + }, + ], + "tag_ids": null, + "updated_at": "2024-03-08T00:44:04.6+00:00", + }, + { + "audios": null, + "created_at": "2024-03-08T00:44:04.6+00:00", + "deleted": null, + "dialect_ids": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100068", + "main": { + "lexeme": { + "default": "jun", + }, + "notes": { + "default": "26/sep./2023", + }, + }, + "senses": [ + { + "glosses": { + "es": "libro", + }, + "id": "11111111-1111-1111-1111-111111100070", + "parts_of_speech": [ + "n", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100071", + ], + }, + { + "glosses": { + "es": "cuaderno", + }, + "id": "11111111-1111-1111-1111-111111100072", + "parts_of_speech": [ + "n", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100073", + ], + }, + { + "glosses": { + "es": "documento", + }, + "id": "11111111-1111-1111-1111-111111100074", + "parts_of_speech": [ + "n", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100075", + ], + }, + { + "glosses": { + "es": "papel", + }, + "id": "11111111-1111-1111-1111-111111100076", + "parts_of_speech": [ + "n", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100077", + ], + }, + ], + "tag_ids": null, + "updated_at": "2024-03-08T00:44:04.6+00:00", + }, + { + "audios": null, + "created_at": "2024-03-08T00:44:04.6+00:00", + "deleted": null, + "dialect_ids": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100078", + "main": { + "lexeme": { + "default": "jeꞌel", + }, + "notes": { + "default": "08/abr./2019", + }, + }, + "senses": [ + { + "glosses": { + "es": "abrir", + }, + "id": "11111111-1111-1111-1111-111111100082", + "sentence_ids": [ + "11111111-1111-1111-1111-111111100083", + ], + }, + { + "glosses": { + "es": "abierto", + }, + "id": "11111111-1111-1111-1111-111111100080", + "parts_of_speech": [ + "adj", + ], + "sentence_ids": [ + "11111111-1111-1111-1111-111111100081", + ], + "variant": { + "default": "makal", + }, + }, + { + "glosses": {}, + "id": "11111111-1111-1111-1111-111111100085", + }, + { + "glosses": {}, + "id": "11111111-1111-1111-1111-111111100084", + }, + ], + "tag_ids": null, + "updated_at": "2024-03-08T00:44:04.6+00:00", + }, + ], + "sentences": [ + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100045", + "text": {}, + "text_id": null, + "translation": { + "es": "Que bueno, ya salió el sol", + "vernacular": "Jaꞌnix lek-a lokꞌix tel kꞌaal", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100047", + "text": { + "default": "Ay bayal skꞌaal te chꞌin x-Ixchele", + }, + "text_id": null, + "translation": { + "es": "Mi hijita Ixchel tiene mucha fiebre", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100049", + "text": { + "default": "Bajtix kaal mamtik, yorailix ich lewa", + }, + "text_id": null, + "translation": { + "es": "Ya transcurrió el día mi estimado señor, es momento de tomar un descanso", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100051", + "text": { + "default": "Toyol kꞌaal ya jkaꞌiy", + }, + "text_id": null, + "translation": { + "es": "Siento mucho calor", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100061", + "text": {}, + "text_id": null, + "translation": { + "es": "Ya hice el fuego", + "vernacular": "Tilix kuꞌun-i kꞌajkꞌi", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100063", + "text": { + "default": "Lom kꞌajkꞌ te mamal jkaxlane", + }, + "text_id": null, + "translation": { + "es": "El mestizo es muy bravo", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100065", + "text": { + "default": "Tsakbil ta kꞌajkꞌ te alale", + }, + "text_id": null, + "translation": { + "es": "El bebé tiene mucha fiebre", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100067", + "text": { + "default": "El café está caliente, tómalo despacio", + }, + "text_id": null, + "translation": { + "es": "Kꞌajkꞌ te kajpele, kꞌume xa awuchꞌ", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100071", + "text": {}, + "text_id": null, + "translation": { + "es": "¿Qué haces? - Estoy leyendo un libro", + "vernacular": "¿Beluk apas? - Yakalon ta skꞌoponel jun", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100073", + "text": { + "default": "La jta ta kitsel te june", + }, + "text_id": null, + "translation": { + "es": "Alcancé a rayar mi cuaderno", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100075", + "text": { + "default": "Maꞌme xa awochꞌ te ajune", + }, + "text_id": null, + "translation": { + "es": "No vayas a arrugar tu documento", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100077", + "text": { + "default": "Zoe rompió el papel", + }, + "text_id": null, + "translation": { + "es": "La schꞌiꞌ jun te Zoe", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100081", + "text": {}, + "text_id": null, + "translation": { + "es": "La puerta de mi casa quedó abierta", + "vernacular": "Jeꞌel jilel stiꞌ jna", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + { + "created_at": "2024-03-08T00:44:04.6+00:00", + "created_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + "deleted": null, + "dictionary_id": "example-v4-senses", + "id": "11111111-1111-1111-1111-111111100083", + "text": { + "default": "Jeꞌa tel tebuk i tiꞌnai ay bayal kꞌaal", + }, + "text_id": null, + "translation": { + "es": "Abre un poco la puerta, hace mucho calor", + }, + "updated_at": "2024-03-08T00:44:04.6+00:00", + "updated_by": "be43b1dd-6c64-494d-b5da-10d70c384433", + }, + ], +} \ No newline at end of file diff --git a/packages/scripts/import/import-data.test.ts b/packages/scripts/import/import-data.test.ts new file mode 100644 index 000000000..4be8ef2e5 --- /dev/null +++ b/packages/scripts/import/import-data.test.ts @@ -0,0 +1,378 @@ +/* eslint-disable require-await */ +import { readFileSync } from 'node:fs' +import { admin_supabase, anon_supabase, diego_ld_user_id, postgres, test_dictionary_id } from '../config-supabase' +import { reset_local_db } from '../reset-local-db' +import { import_data as _import_data } from './import-data' +import { parseCSVFrom } from './parse-csv' +import type { Row } from './row.type' + +const import_id = `v4-test` +const timestamp_from_which_to_fetch_data = '1971-01-01T00:00:00Z' + +vi.mock('node:crypto', () => { + const uuid_template = '11111111-1111-1111-1111-111111111111' + let current_uuid_index = 0 + + function incremental_consistent_uuid() { + return uuid_template.slice(0, -5) + (current_uuid_index++).toString().padStart(5, '0') + } + + return { + randomUUID: incremental_consistent_uuid, + } +}) + +vi.mock('./incrementing-timestamp', () => { + return { + millisecond_incrementing_timestamp: () => new Date('2024-03-08T00:44:04.600392+00:00').toISOString(), + } +}) + +async function import_data(rows: Row[], dictionary_id = test_dictionary_id) { + await _import_data({ + dictionary_id, + rows, + import_id, + upload_operations: { + upload_photo: async (filepath: string) => ({ storage_path: filepath, serving_url: filepath }), + upload_audio: async (filepath: string) => ({ storage_path: filepath }), + // upload_video: async (filepath: string) => ({ storage_path: filepath }), + }, + live: true, + }) + const { data } = await anon_supabase.rpc('entries_from_timestamp', { + get_newer_than: timestamp_from_which_to_fetch_data, + dict_id: dictionary_id, + }) + return data +} + +describe(import_data, () => { + beforeEach(reset_local_db) + + test('two audio files does not duplicate senses', async () => { + const entries = await import_data([{ + 'lexeme': 'hi', + 'en_gloss': 'hi', + 'soundFile': '1.mp3', + // @ts-expect-error + 'soundFile.2': '2.mp3', // this is what it will look like in the future but we are not yet supporting + 'photoFile': '1.jpg', + }]) + const { data: entry } = await anon_supabase.rpc('entry_by_id', { + passed_entry_id: entries[0].id, + }) + expect(entry).toEqual(entries) + expect(entries).toMatchInlineSnapshot(` + [ + { + "audios": [ + { + "id": "11111111-1111-1111-1111-111111100003", + "storage_path": "1.mp3", + }, + { + "id": "11111111-1111-1111-1111-111111100004", + "storage_path": "2.mp3", + }, + ], + "created_at": "2024-03-08T00:44:04.6+00:00", + "deleted": null, + "dialect_ids": null, + "dictionary_id": "test_dictionary_id", + "id": "11111111-1111-1111-1111-111111100000", + "main": { + "lexeme": { + "default": "hi", + }, + }, + "senses": [ + { + "glosses": { + "en": "hi", + }, + "id": "11111111-1111-1111-1111-111111100002", + "photo_ids": [ + "11111111-1111-1111-1111-111111100005", + ], + }, + ], + "tag_ids": null, + "updated_at": "2024-03-08T00:44:04.6+00:00", + }, + ] + `) + }) + + test('imports simple entry', async () => { + const entries = await import_data([{ lexeme: 'hi', en_gloss: 'hi', pluralForm: '', nounClass: '' }]) + expect(entries).toMatchInlineSnapshot(` + [ + { + "audios": null, + "created_at": "2024-03-08T00:44:04.6+00:00", + "deleted": null, + "dialect_ids": null, + "dictionary_id": "test_dictionary_id", + "id": "11111111-1111-1111-1111-111111100006", + "main": { + "lexeme": { + "default": "hi", + }, + }, + "senses": [ + { + "glosses": { + "en": "hi", + }, + "id": "11111111-1111-1111-1111-111111100008", + }, + ], + "tag_ids": null, + "updated_at": "2024-03-08T00:44:04.6+00:00", + }, + ] + `) + const { data: content_updates } = await admin_supabase.from('content_updates').select() + expect(content_updates).toMatchInlineSnapshot(` + [ + { + "audio_id": null, + "change": null, + "data": { + "lexeme": { + "default": "hi", + }, + }, + "dialect_id": null, + "dictionary_id": "test_dictionary_id", + "entry_id": "11111111-1111-1111-1111-111111100006", + "id": "11111111-1111-1111-1111-111111100007", + "import_id": "v4-test", + "photo_id": null, + "sense_id": null, + "sentence_id": null, + "speaker_id": null, + "table": null, + "tag_id": null, + "text_id": null, + "timestamp": "2024-03-08T00:44:04.6+00:00", + "type": "insert_entry", + "user_id": "be43b1dd-6c64-494d-b5da-10d70c384433", + "video_id": null, + }, + ] + `) + }) + + test('imports two entries with same dialect and tag', async () => { + const entries = await import_data([ + { lexeme: 'hi', dialects: 'dialect 1', tags: 'archaic' }, + { lexeme: 'world', dialects: 'dialect 1', tags: 'archaic' }, + ]) + expect(entries[0].dialect_ids).toHaveLength(1) + expect(entries[0].tag_ids).toHaveLength(1) + expect(entries[0].dialect_ids).toEqual(entries[1].dialect_ids) + expect(entries[0].tag_ids).toEqual(entries[1].tag_ids) + }) + + test('imports audio for two entries with same speaker', async () => { + const entries = await import_data([ + { lexeme: 'hi', soundFile: '1.mp3', speakerName: 'speaker 1', speakerHometown: 'Whoville', speakerAge: '12', speakerGender: 'm' }, + { lexeme: 'world', soundFile: '2.mp3', speakerName: 'speaker 1' }, + ]) + const { data: speakers } = await anon_supabase.from('speakers_view').select() + expect(speakers[0]).toMatchInlineSnapshot(` + { + "birthplace": "Whoville", + "created_at": "2024-03-08T00:44:04.6+00:00", + "decade": 12, + "deleted": null, + "dictionary_id": "test_dictionary_id", + "gender": "m", + "id": "11111111-1111-1111-1111-111111100021", + "name": "speaker 1", + "updated_at": "2024-03-08T00:44:04.6+00:00", + } + `) + expect(entries[0].audios[0].speaker_ids[0]).toEqual(speakers[0].id) + expect(entries[0].audios[0].speaker_ids).toEqual(entries[1].audios[0].speaker_ids) + }) + + test('imports photos', async () => { + const entries = await import_data([ + { lexeme: 'hi', photoFile: 'hello.jpg' }, + ]) + expect(entries[0].senses[0].photo_ids).toMatchInlineSnapshot(` + [ + "11111111-1111-1111-1111-111111100029", + ] + `) + }) + + test('imports complex entry', async () => { + const entries = await import_data([{ + 'lexeme': 'hi', + 'localOrthography': 'lo1', + 'localOrthography.2': 'lo2', + 'localOrthography.5': 'lo5', + 'phonetic': 'hɪ', + 'morphology': 'noun', + 'source': 'a fun, cool source | source 2 |', + 'scientificName': 'scientific name', + 'ID': 'A1', + 'notes': 'notes', + 'dialects': 'dialect 1| dialect 2', + 'tags': 'clean up| sea-diving, scuba', + + // first sense + 'es_gloss': 'hola', + 'partOfSpeech': 'n', + 'partOfSpeech.2': 'v', + 'variant': 'variant', + 'pluralForm': 'his', + 'nounClass': '12', + 'default_vernacular_exampleSentence': 'we say hi like this', + 'en_exampleSentence': 'this is the english hi translation', + + // second sense + 's2.en_gloss': 'bye', + 's2.semanticDomain': '2', + 's2.semanticDomain.2': '2.3', + + // third sense + 's3.fr_gloss': 'auch', + 's3.default_vernacular_exampleSentence': 'hi doc', + 's3.fr_exampleSentence': 'Bonjour docteur', + 's3.default_vernacular_exampleSentence.2': 'bye doc', + 's3.fr_exampleSentence.2': 'Au revoir docteur', + }]) + expect(entries).toMatchInlineSnapshot(` + [ + { + "audios": null, + "created_at": "2024-03-08T00:44:04.6+00:00", + "deleted": null, + "dialect_ids": [ + "11111111-1111-1111-1111-111111100032", + "11111111-1111-1111-1111-111111100033", + ], + "dictionary_id": "test_dictionary_id", + "id": "11111111-1111-1111-1111-111111100030", + "main": { + "elicitation_id": "A1", + "lexeme": { + "default": "hi", + "lo1": "lo1", + "lo2": "lo2", + "lo5": "lo5", + }, + "morphology": "noun", + "notes": { + "default": "notes", + }, + "phonetic": "hɪ", + "scientific_names": [ + "scientific name", + ], + "sources": [ + "a fun, cool source", + "source 2", + ], + }, + "senses": [ + { + "glosses": { + "es": "hola", + }, + "id": "11111111-1111-1111-1111-111111100036", + "noun_class": "12", + "parts_of_speech": [ + "n", + "v", + ], + "plural_form": { + "default": "his", + }, + "sentence_ids": [ + "11111111-1111-1111-1111-111111100037", + ], + "variant": { + "default": "variant", + }, + }, + { + "glosses": { + "fr": "auch", + }, + "id": "11111111-1111-1111-1111-111111100039", + "sentence_ids": [ + "11111111-1111-1111-1111-111111100040", + "11111111-1111-1111-1111-111111100041", + ], + }, + { + "glosses": { + "en": "bye", + }, + "id": "11111111-1111-1111-1111-111111100038", + "semantic_domains": [ + "2", + "2.3", + ], + }, + ], + "tag_ids": [ + "11111111-1111-1111-1111-111111100034", + "11111111-1111-1111-1111-111111100035", + ], + "updated_at": "2024-03-08T00:44:04.6+00:00", + }, + ] + `) + const { data: sentences } = await anon_supabase.from('sentences').select('id, text, translation') + expect(sentences).toMatchInlineSnapshot(` + [ + { + "id": "11111111-1111-1111-1111-111111100037", + "text": { + "default": "we say hi like this", + }, + "translation": { + "en": "this is the english hi translation", + }, + }, + { + "id": "11111111-1111-1111-1111-111111100040", + "text": { + "default": "hi doc", + }, + "translation": { + "fr": "Bonjour docteur", + }, + }, + { + "id": "11111111-1111-1111-1111-111111100041", + "text": { + "default": "bye doc", + }, + "translation": null, + }, + ] + `) + }) + + test('imports from CSV', async () => { + const dictionary_id = 'example-v4-senses' + const add_dictionary_sql = `INSERT INTO "public"."dictionaries" ("id", "name", "created_at", "created_by", "updated_at", "updated_by") VALUES + ('${dictionary_id}', 'Test Dictionary', '2024-03-18 14:16:22.367188+00', '${diego_ld_user_id}', '2024-03-18 14:16:22.367188+00', '${diego_ld_user_id}');` + await postgres.execute_query(add_dictionary_sql) + + const file = readFileSync(`./import/data/${dictionary_id}/${dictionary_id}.csv`, 'utf8') + const rows = parseCSVFrom(file) + rows.shift() // remove header row + const entries = await import_data(rows, dictionary_id) + const { data: sentences } = await anon_supabase.from('sentences').select() + expect({ entries, sentences }).toMatchFileSnapshot('import-data.snap.json') + }) +}) diff --git a/packages/scripts/import/import-data.ts b/packages/scripts/import/import-data.ts new file mode 100644 index 000000000..9cd1a5eab --- /dev/null +++ b/packages/scripts/import/import-data.ts @@ -0,0 +1,62 @@ +import { writeFileSync } from 'node:fs' +import { admin_supabase, anon_supabase, postgres } from '../config-supabase' +import type { Upload_Operations } from './generate-sql-statements' +import { generate_sql_statements } from './generate-sql-statements' +import type { Row } from './row.type' + +export async function import_data({ + dictionary_id, + rows, + import_id, + upload_operations, + live = false, +}: { + dictionary_id: string + rows: Row[] + import_id: string + upload_operations: Upload_Operations + live: boolean +}) { + const { data: dialects } = await anon_supabase.from('dialects').select('id, name').eq('dictionary_id', dictionary_id) + const { data: speakers } = await anon_supabase.from('speakers_view').select('id, name').eq('dictionary_id', dictionary_id) + const { data: tags } = await anon_supabase.from('tags').select('id, name').eq('dictionary_id', dictionary_id) + + const start_index = 0 + const batch_size = 30000 + const end_index = start_index + batch_size + let sql_query = 'BEGIN;' // Start a transaction + + for await (const [index, row] of rows.entries()) { + if (!row.lexeme) + continue + + if (index >= start_index && index < end_index) { + console.info(index) + const sql_statements = await generate_sql_statements({ row, dictionary_id, import_id, speakers, dialects, tags, upload_operations }) + sql_query += `${sql_statements}\n` + + if (index % 500 === 0) + console.log(`import reached ${index}`) + } + } + + sql_query += '\nCOMMIT;' // End the transaction + + try { + if (!process.env.CI) { + writeFileSync(`./logs/${Date.now()}_${dictionary_id}-${start_index}-query.sql`, sql_query) + } + if (live) { + console.log('executing sql query') + await postgres.execute_query(sql_query) + console.log('finished') + } + } catch (err) { + console.error(err) + if (live) { + await postgres.execute_query('ROLLBACK;') // Rollback the transaction in case of error + } + } + + return sql_query +} diff --git a/packages/scripts/import/import-media.ts b/packages/scripts/import/import-media.ts index 655777a40..bd5fccc6b 100644 --- a/packages/scripts/import/import-media.ts +++ b/packages/scripts/import/import-media.ts @@ -2,7 +2,6 @@ import { dirname, join } from 'node:path' import { fileURLToPath } from 'node:url' import * as fs from 'node:fs' -import type { GoalDatabasePhoto } from '@living-dictionaries/types' import { environment, storage } from '../config-firebase.js' import { getImageServingUrl } from './getImageServingUrl.js' @@ -10,79 +9,87 @@ const __dirname = dirname(fileURLToPath(import.meta.url)) const fileBucket = `talking-dictionaries-${environment === 'prod' ? 'alpha' : 'dev'}.appspot.com` -export async function uploadAudioFile( - audioFileName: string, - entryId: string, - dictionaryId: string, +export async function upload_audio_to_gcs({ + filepath, + entry_id, + dictionary_id, live = false, -): Promise { - const audioDir = join(__dirname, `data/${dictionaryId}/audio`) - const audioFilePath = join(audioDir, audioFileName) +}: { + filepath: string + entry_id: string + dictionary_id: string + live?: boolean +}): Promise { + const audioDir = join(__dirname, `data/${dictionary_id}/audio`) + const audioFilePath = join(audioDir, filepath) if (!fs.existsSync(audioFilePath)) { - console.log(`>> Missing audio file: ${audioFileName}`) + console.log(`>> Missing audio file: ${filepath}`) return null } try { - const [fileTypeSuffix] = audioFileName.match(/\.[0-9a-z]+$/i) - const uploadedAudioPath = `${dictionaryId}/audio/${entryId}_${new Date().getTime()}${fileTypeSuffix}` + const [fileTypeSuffix] = filepath.match(/\.[0-9a-z]+$/i) + const uploadedAudioPath = `${dictionary_id}/audio/${entry_id}_${new Date().getTime()}${fileTypeSuffix}` if (live) { await storage.bucket(fileBucket).upload(audioFilePath, { destination: uploadedAudioPath, metadata: { - originalFileName: audioFileName, + originalFileName: filepath, }, }) } return uploadedAudioPath } catch (err) { - throw new Error(`Not adding audio ${audioFileName} as the server had trouble uploading it. Double-check the file to see if there is a problem with it or perhaps there is code/server/network-connection problem. Error: ${err}`) + throw new Error(`Not adding audio ${filepath} as the server had trouble uploading it. Double-check the file to see if there is a problem with it or perhaps there is code/server/network-connection problem. Error: ${err}`) } } -export async function uploadImageFile( - imageFileName: string, - entryId: string, - dictionaryId: string, +export async function upload_photo_to_gcs({ + filepath, + entry_id, + dictionary_id, live = false, -): Promise { - const imageDir = join(__dirname, `data/${dictionaryId}/images`) - const imageFilePath = join(imageDir, imageFileName) +}: { + filepath: string + entry_id: string + dictionary_id: string + live?: boolean +}) { + const imageDir = join(__dirname, `data/${dictionary_id}/images`) + const imageFilePath = join(imageDir, filepath) if (!fs.existsSync(imageFilePath)) { - console.log(`>> Missing image file: ${imageFileName}`) + console.log(`>> Missing image file: ${filepath}`) return null } try { - const [fileTypeSuffix] = imageFileName.match(/\.[0-9a-z]+$/i) - const storagePath = `${dictionaryId}/images/${entryId}_${new Date().getTime()}${fileTypeSuffix}` + const [fileTypeSuffix] = filepath.match(/\.[0-9a-z]+$/i) + const storage_path = `${dictionary_id}/images/${entry_id}_${new Date().getTime()}${fileTypeSuffix}` if (!live) - return { path: storagePath, gcs: 'no-path-bc-dry-run' } + return { storage_path, serving_url: 'no-serving_url-bc-dry-run' } await storage.bucket(fileBucket).upload(imageFilePath, { - destination: storagePath, + destination: storage_path, metadata: { - originalFileName: imageFileName, + originalFileName: filepath, }, }) - let gcsPath + let serving_url try { - gcsPath = await getImageServingUrl(storagePath, environment) + serving_url = await getImageServingUrl(storage_path, environment) } catch (err) { throw new Error(`!!! Error getting image serving URL: ${err}`) } return { - path: storagePath, - gcs: gcsPath, - ts: new Date().getTime(), - // cr: // not yet included in import template + storage_path, + serving_url, } } catch (err) { - throw new Error(`!!! Not adding image ${imageFileName} as the server had trouble digesting it. Double-check the file to see if it is just a corrupted jpg (as some are) or if the file is good and perhaps there is code/server/network-connection problem. Error: ${err}`) + throw new Error(`!!! Not adding image ${filepath} as the server had trouble digesting it. Double-check the file to see if it is just a corrupted jpg (as some are) or if the file is good and perhaps there is code/server/network-connection problem. Error: ${err}`) } } diff --git a/packages/scripts/import/import-to-firebase-supabase.ts b/packages/scripts/import/import-to-firebase-supabase.ts deleted file mode 100644 index d6bc6e1e7..000000000 --- a/packages/scripts/import/import-to-firebase-supabase.ts +++ /dev/null @@ -1,220 +0,0 @@ -import { readFileSync } from 'node:fs' -import { randomUUID } from 'node:crypto' -import type { ActualDatabaseEntry, ContentUpdateRequestBody, ISpeaker } from '@living-dictionaries/types' -import type { Timestamp } from 'firebase/firestore' -import { db, environment, timestamp } from '../config-firebase.js' -import type { ContentUpdateResponseBody } from '../../site/src/routes/api/db/content-update/+server' -import { uploadAudioFile, uploadImageFile } from './import-media.js' -import { parseCSVFrom } from './parse-csv.js' -import { post_request } from './post-request.js' -import { convert_row_to_objects_for_databases } from './convert_row_to_objects_for_databases.js' -import type { Row } from './row.type' - -const supabase_content_update_endpoint = 'http://localhost:3041/api/db/content-update' -const dev_developer_in_charge_supabase_uid = '12345678-abcd-efab-cdef-123456789013' // in Supabase diego@livingtongues.org -> Diego Córdova Nieto; -const prod_developer_in_charge_supabase_uid = 'be43b1dd-6c64-494d-b5da-10d70c384433' // in Supabase diego@livingtongues.org -> Diego Córdova Nieto; -const user_id_from_local = environment === 'dev' ? dev_developer_in_charge_supabase_uid : prod_developer_in_charge_supabase_uid - -const developer_in_charge_firebase_uid = 'qkTzJXH24Xfc57cZJRityS6OTn52' // diego@livingtongues.org -> Diego Córdova Nieto; - -export async function importFromSpreadsheet({ dictionaryId, live }: { dictionaryId: string, live: boolean }) { - const dateStamp = Date.now() - const import_id = `v4-${dateStamp}` - - const file = readFileSync(`./import/data/${dictionaryId}/${dictionaryId}.csv`, 'utf8') - const rows = parseCSVFrom(file) - const entries = await importEntries(dictionaryId, rows, import_id, live) - - console.log( - `Finished ${live ? 'importing' : 'emulating'} ${entries.length} entries to ${environment === 'dev' ? 'http://localhost:3041/' : 'livingdictionaries.app/' - }${dictionaryId} in ${(Date.now() - dateStamp) / 1000} seconds`, - ) - console.log('') // line break - return entries -} - -export async function importEntries( - dictionary_id: string, - rows: Row[], - import_id: string, - live = false, -): Promise { - const firebase_entries: ActualDatabaseEntry[] = [] - let entryCount = 0 - let batchCount = 0 - let batch = db.batch() - const colRef = db.collection(`dictionaries/${dictionary_id}/words`) - - const speaker_snapshots = (await db.collection('speakers').where('contributingTo', 'array-contains', dictionary_id).get()).docs - const speakers = speaker_snapshots.map((snap) => { - return { id: snap.id, ...(snap.data() as ISpeaker) } - }) - - for (const row of rows) { - if (!row.lexeme || row.lexeme === '(word/phrase)') - continue - - if (live && batchCount === 200) { - console.log('Committing batch of entries ending with: ', entryCount) - await batch.commit() - batch = db.batch() - batchCount = 0 - } - - const universal_entry_id = colRef.doc().id - - const { firebase_entry, supabase_senses, supabase_sentences } = convert_row_to_objects_for_databases({ row, import_id, timestamp }) - - for (const { sense, sense_id } of supabase_senses) { - await update_sense({ entry_id: universal_entry_id, dictionary_id, sense, sense_id, live, import_id }) - } - for (const { sentence, sentence_id, sense_id } of supabase_sentences) { - await update_sentence({ entry_id: universal_entry_id, dictionary_id, sentence, sense_id, sentence_id, live, import_id }) - } - - if (row.photoFile) { - const pf = await uploadImageFile(row.photoFile, universal_entry_id, dictionary_id, live) - if (pf) firebase_entry.pf = pf - } - - if (row.soundFile) { - const audioFilePath = await uploadAudioFile(row.soundFile, universal_entry_id, dictionary_id, live) - firebase_entry.sf = { - path: audioFilePath, - ts: Date.now(), - } - - if (row.speakerName) { - const speaker: ISpeaker = speakers.find(speaker => speaker.displayName === row.speakerName) - if (speaker) { - firebase_entry.sf.sp = speaker.id - } else { - const new_speaker: ISpeaker = { - displayName: row.speakerName, - birthplace: row.speakerHometown || '', - decade: Number.parseInt(row.speakerAge) || null, - gender: row.speakerGender as 'm' | 'f' | 'o' || null, - contributingTo: [dictionary_id], - createdAt: timestamp as Timestamp, - createdBy: developer_in_charge_firebase_uid, - updatedAt: timestamp as Timestamp, - updatedBy: developer_in_charge_firebase_uid, - } - if (live) { - const new_speaker_id = await db.collection('speakers').add(new_speaker).then(ref => ref.id) - firebase_entry.sf.sp = new_speaker_id - speakers.push({ id: new_speaker_id, ...new_speaker }) - } - } - } - } - - firebase_entries.push(firebase_entry) - batch.create(colRef.doc(universal_entry_id), firebase_entry) - batchCount++ - entryCount++ - } - - console.log(`Committing final batch of entries ending with: ${entryCount}`) - if (live) await batch.commit() - return firebase_entries -} - -export async function update_sense({ - entry_id, - dictionary_id, - sense, - sense_id, - live, - import_id, -}: { - entry_id: string - dictionary_id: string - sense: ContentUpdateRequestBody['change']['sense'] - sense_id: string - live: boolean - import_id: string -}) { - if (!live) return console.log({ dry_sense: sense }) - - const { data, error } = await post_request(supabase_content_update_endpoint, { - id: randomUUID(), - auth_token: null, - user_id_from_local, - dictionary_id, - entry_id, - timestamp: new Date().toISOString(), - sense_id, - table: 'senses', - change: { - sense, - }, - import_id, - }) - - if (error) { - console.error('Error inserting into Supabase: ', error) - throw new Error(error.message) - } - - console.log({ data }) - - return true -} - -export async function update_sentence({ - entry_id, - dictionary_id, - sentence, - sense_id, - sentence_id, - live, - import_id, -}: { - entry_id: string - dictionary_id: string - sentence: ContentUpdateRequestBody['change']['sentence'] - sense_id: string - sentence_id: string - live: boolean - import_id: string -}) { - if (!live) return console.log({ dry_sense: sentence }) - - const { data, error } = await post_request(supabase_content_update_endpoint, { - id: randomUUID(), - auth_token: null, - user_id_from_local, - dictionary_id, - entry_id, - timestamp: new Date().toISOString(), - sense_id, - sentence_id, - table: 'sentences', - change: { - sentence, - }, - import_id, - }) - - if (error) { - console.error('Error inserting into Supabase: ', error) - throw new Error(error.message) - } - - console.log({ data }) - - return true -} - -// Current flow: (out of date - needs updated) -// Use Firebase to import entry as is already written (import-spreadsheet-v4.ts) including 1st sense, but check the import data for additional senses. If so then do the below flow at that point using a simple function call. -// use that entry id to add additional senses to Supabase via entry_updates (seen in routes\api\db\change\entry\+server.ts and lib\supabase\change\sense.ts) - one update for ps, one for gloss -// add example sentence to new table (Jacob will create, so it doesn't exist yet) -// add another entry_update to connect that example sentence id to the sense - -// Future Supabase-only flow - ignore for now -// Import entry into imports table, after which a trigger edge function will create the entry, get the entry id -// use that entry id to add senses via entry_updates -// add example sentence to new table (doesn't exist yet) -// add entry_update to connect that example sentence to the sense diff --git a/packages/scripts/import/import.ts b/packages/scripts/import/import.ts index 383328324..79a136010 100644 --- a/packages/scripts/import/import.ts +++ b/packages/scripts/import/import.ts @@ -1,16 +1,9 @@ +import { readFileSync } from 'node:fs' import { program } from 'commander' -// @ts-expect-error -import detect from 'detect-port' -import { importFromSpreadsheet } from './import-to-firebase-supabase' - -await checkForDevServer() - -async function checkForDevServer() { - const port = await detect(3041) // will return 3041 if available, next available if it's not (so if 3041 is taken, it will return 3042, etc.) - const devServerRunning = port > 3041 - if (devServerRunning) return - throw new Error('SvelteKit dev server not detected - run `pnpm dev` (or `pnpm -F site prod` if deploying to production) before running this import script to ensure the endpoint functions that save to Supabase are available.') -} +import { parseCSVFrom } from './parse-csv.js' +import type { Row } from './row.type' +import { import_data } from './import-data.js' +import { upload_audio_to_gcs, upload_photo_to_gcs } from './import-media.js' program .option('-e, --environment [dev/prod]', 'Database Project', 'dev') @@ -18,12 +11,45 @@ program .option('--live', 'By default only values are logged, run with live flag to upload data and media') .parse(process.argv) -const dictionaryId = program.opts().id -const { live } = program.opts() -if (live) - console.log('Live run, everything is happening!') -else - console.log('Dry run, no data will be uploaded') +const { live, id: dictionary_id, environment } = program.opts() + +await import_from_spreadsheet({ dictionary_id, live }) + +async function import_from_spreadsheet({ dictionary_id, live }: { dictionary_id: string, live: boolean }) { + if (live) + console.log('Live run, everything is happening!') + else + console.log('Dry run, no data will be uploaded') + + console.log(`Importing ${dictionary_id} to ${environment}.`) + + const dateStamp = Date.now() + const import_id = `v4-${dateStamp}` + + const file = readFileSync(`./import/data/${dictionary_id}/${dictionary_id}.csv`, 'utf8') + const rows = parseCSVFrom(file) + rows.shift() // remove header row + await import_data({ dictionary_id, rows, import_id, live, upload_operations: { upload_photo, upload_audio } }) + + console.log( + `Finished ${live ? 'importing' : 'emulating'} ${rows.length} entries to ${environment === 'dev' ? 'http://localhost:3041/' : 'livingdictionaries.app/' + }${dictionary_id} in ${(Date.now() - dateStamp) / 1000} seconds`, + ) + console.log('') // line break +} + +async function upload_photo(filepath: string, entry_id: string) { + return await upload_photo_to_gcs({ dictionary_id, filepath, entry_id, live }) +} + +async function upload_audio(filepath: string, entry_id: string) { + const storage_path = await upload_audio_to_gcs({ dictionary_id, filepath, entry_id, live }) + return { storage_path } +} -console.log(`Importing ${dictionaryId} to ${program.opts().environment}.`) -importFromSpreadsheet({ dictionaryId, live }).then(entries => console.log(entries)) +// async function upload_video(filepath: string) { +// // TODO +// console.log({ dictionary_id }) +// await new Promise(resolve => setTimeout(resolve, 0)) +// return { storage_path: filepath } +// } diff --git a/packages/scripts/import/incrementing-timestamp.ts b/packages/scripts/import/incrementing-timestamp.ts new file mode 100644 index 000000000..90edf4ddd --- /dev/null +++ b/packages/scripts/import/incrementing-timestamp.ts @@ -0,0 +1,7 @@ +const yesterday = new Date(Date.now() - 24 * 60 * 60 * 1000) +let milliseconds_to_add = 0 + +export function millisecond_incrementing_timestamp(): string { + milliseconds_to_add += 1 + return new Date(yesterday.getTime() + milliseconds_to_add).toISOString() +} diff --git a/packages/scripts/import/old-firebase-function/helpers/abbreviate-td-pos.ts b/packages/scripts/import/old-firebase-function/helpers/abbreviate-td-pos.ts deleted file mode 100644 index 0d26a1f72..000000000 --- a/packages/scripts/import/old-firebase-function/helpers/abbreviate-td-pos.ts +++ /dev/null @@ -1,42 +0,0 @@ -// import { partsOfSpeech } from '@living-dictionaries/parts'; - -export interface IMatchResult { - matchedPOS?: string; - unMatchedPOS?: string; - notes?: string; -} - -/** - * Convert old Talking Dictionary parts of speech (both English and Spanish) to English abbreviations - */ -export const abbreviateTDPartOfSpeech = (input: string): IMatchResult => { - // save any notes in parentheses - const parentheticalNote = input.match(/\(.+\)/); - - const sanitizedInput = input - .replace(/\(.+\)/, '') // remove notes in parentheses - .trim() - .toLowerCase() - .replace(/[.]$/, '') // removes word-final periods - .replace(/:/g, ''); // removes random colons in old TD data - const matchingPOS = partsOfSpeech.find((part) => { - //TODO, possibly more efficient just to return enAbbrev and not whole part object - return ( - part.enName === sanitizedInput || - part.esName === sanitizedInput || - part.enAbbrev === sanitizedInput || - part.esAbbrev === sanitizedInput || - (part.tdAlternates && part.tdAlternates.includes(sanitizedInput)) - ); - }); - const result: IMatchResult = {}; - if (matchingPOS) { - result.matchedPOS = matchingPOS.enAbbrev; - } else { - result.unMatchedPOS = sanitizedInput; - } - if (parentheticalNote) { - result.notes = '' + parentheticalNote; - } - return result; -}; diff --git a/packages/scripts/import/old-firebase-function/helpers/clean-up-data.ts b/packages/scripts/import/old-firebase-function/helpers/clean-up-data.ts deleted file mode 100644 index b32d8c36b..000000000 --- a/packages/scripts/import/old-firebase-function/helpers/clean-up-data.ts +++ /dev/null @@ -1,13 +0,0 @@ -/** - * Fix abnormalities and odd characters in old Talking Dictionaries data by converting JSON to a string, find-replacing, and converting back to JSON. - */ -export const cleanUpData = (data: any[]) => { - const cleanedData = JSON.parse( - JSON.stringify(data) - .replace(/’/g, '\'') // handle apostrophes - .replace(/"/g, '\'') // handle quote marks in ho, '\"' and \u0022 threw errors from closing value - .replace(/\\u0000/g, '') // handle odd null values in first 4 entries of ho - .replace(/ib_gloss/g, 'ig_gloss') // convert "ib" to "ig" for "Igbo" in Olukumi - ); - return cleanedData; -} \ No newline at end of file diff --git a/packages/scripts/import/old-firebase-function/helpers/delete-duplicate-entries.ts b/packages/scripts/import/old-firebase-function/helpers/delete-duplicate-entries.ts deleted file mode 100644 index 5e78788c3..000000000 --- a/packages/scripts/import/old-firebase-function/helpers/delete-duplicate-entries.ts +++ /dev/null @@ -1,150 +0,0 @@ -/** - * Delete completely duplicated entries and log stats for partial duplicates as well as number of duplicate image and audio references. - */ -export const deleteDuplicateEntries = (data: any[]) => { - const uniqueLexemes: string[] = []; - const uniqueAudioReferences: string[] = []; - const uniqueImageReferences: string[] = []; - let duplicateAudioReferences = 0; - let duplicateImageReferences = 0; - let duplicateEntries = 0; - - for (const entry of data) { - if (entry.audio) { - if (uniqueAudioReferences.indexOf(entry.audio) === -1) { - uniqueAudioReferences.push(entry.audio); - } else { - duplicateAudioReferences++; - } - } - if (entry.image) { - if (uniqueImageReferences.indexOf(entry.image) === -1) { - uniqueImageReferences.push(entry.image); - } else { - duplicateImageReferences++; - } - } - } - - for (let i = data.length - 1; i >= 0; i--) { - const entry = data[i]; - const nextEntry = data[i + 1]; - if (entry.lang) { - if (uniqueLexemes.indexOf(entry.lang) === -1) { - uniqueLexemes.push(entry.lang); - } else { - let uniqueEntry = false; - Object.keys(entry).forEach(key => { - if ((key != 'oid') && (entry[key] != nextEntry[key])) { - // console.log(entry[key], ' >> ', nextEntry[key]); - uniqueEntry = true; - } - }) - if (!uniqueEntry) { - duplicateEntries++; - console.log('\nRemoved', entry, '\nas it is a complete duplicate with: ', nextEntry); - data.splice(i, 1); - } - } - } else { - console.log('\nNo lang field found for: ', entry.oid); - } - } - - console.log(`\nLexeme duplicates: ${data.length - uniqueLexemes.length} duplicates out of ${data.length} entries`); - console.log(` Removed ${duplicateEntries} completely duplicate entries. The other ${data.length - uniqueLexemes.length - duplicateEntries} lexeme duplicates had at least 1 difference in the entry data and should be manually consolidated later on the site.`); // math will be wrong for dictionaries who have entries with no lexeme (lang field) - - console.log(` Unique audio references: ${uniqueAudioReferences.length} < would be great if this matched the audioFileCount above`); - if (duplicateAudioReferences) { - console.log(` Duplicate audio references: ${duplicateAudioReferences} < each entry that has a duplicate audio reference (meaning another entry also points to the same audio file in the old Talking Dictionaries) will upload its own unique renamed audio file so that none of the entries have intertwined media that other entries depend on. This will allow us to clean up duplicate entries and their associated media with ease without worrying about deleting media that other entries depend on.`); - } - - console.log(` Unique image references: ${uniqueImageReferences.length} < would be great if this matched the imageFileCount above`); - if (duplicateImageReferences) { - console.log(` Duplicate image references: ${duplicateImageReferences} < same story here as with duplicate audio references`); - } - return data; -} - - - -// Scratch notes - -// export const deleteDuplicateEntries = (data: any[]) => { -// const start = Date.now(); -// const uniqueLexemes: string[] = []; -// const uniqueAudioReferences: string[] = []; -// const uniqueImageReferences: string[] = []; - - -// let entries = new Set(); -// for (const entry of data) { -// delete entry.oid; -// entries.add(entry); -// } -// console.log(data.length, entries.size); - -// return data; -// const uniqueKeys: string[] = []; -// for (const entry of data) { -// Object.keys(entry).forEach(key => { -// if (uniqueKeys.indexOf(key) === -1) uniqueKeys.push(key); -// }) -// } -// const oidIndex = uniqueKeys.indexOf('oid'); -// uniqueKeys.splice(oidIndex, 1); -// console.log(uniqueKeys); - -// console.log('\nListing Entries with Duplicates audio files:') - -// for (var i = data.length - 1; i >= 0; i--) { -// if (data[i].lang) { -// if (uniqueAudioReferences.indexOf(data[i].audio) === -1) { -// uniqueAudioReferences.push(data[i].audio); -// } -// } -// } - -// for (var i = data.length - 1; i >= 0; i--) { -// if (data[i].image) { -// if (uniqueImageReferences.indexOf(data[i].image) === -1) { -// uniqueImageReferences.push(data[i].image); -// } else { -// console.log('Duplicate image reference, ', data[i].lang, data[i].image); -// } -// } -// } - -// for (var i = data.length - 1; i >= 0; i--) { -// const entry = data[i]; -// const nextEntry = data[i + 1]; -// if (data[i].lang) { -// if (uniqueLexemes.indexOf(data[i].lang) === -1) { -// uniqueLexemes.push(data[i].lang); -// } else { -// console.log(entry.oid); -// Object.keys(entry).forEach(key => { -// Boolean(entry[key]) && Boolean(nextEntry[key]) && Boolean(entry[key] != nextEntry[key]) && console.log(entry[key], ' >> ', nextEntry[key]); -// }) -// console.log(''); -// console.log('\n>>> Duplicate: ', data[i].lang); -// if (data[i].audio == data[i + 1].audio) { -// console.log('Removing (same audio file)'); -// console.log(data[i].gloss) -// console.log(data[i + 1].gloss) -// data.splice(i, 1); -// } else { -// console.log('Skipping (different audio files'); -// console.log(data[i].audio); -// console.log(data[i + 1].audio); -// console.log(data[i].gloss) -// console.log(data[i + 1].gloss) -// } -// } -// } -// } -// console.log(uniqueLexemes.length, uniqueAudioReferences.length); - -// console.log(`Found ${data.length - uniqueLexemes.length} duplicates out of ${data.length} entries in ${Date.now() - start}ms.\n`) -// return data; -// } \ No newline at end of file diff --git a/packages/scripts/import/old-firebase-function/helpers/find-unmatched-pos.ts b/packages/scripts/import/old-firebase-function/helpers/find-unmatched-pos.ts deleted file mode 100644 index 544ef45b6..000000000 --- a/packages/scripts/import/old-firebase-function/helpers/find-unmatched-pos.ts +++ /dev/null @@ -1,46 +0,0 @@ -import { abbreviateTDPartOfSpeech } from "./abbreviate-td-pos"; - -/** - * Logs unique parts and throw an error if any unmatched parts found so we know what to fix. Fix as many as we want to, then comment Error throwing to allow unmatched POS to be simple write-ins. - */ -export const findUnmatchedPOS = (data: any[]) => { - console.log('\n---------------------\nMatching Parts of Speech for ', data.length, ' entries'); - const uniquePOS: string[] = []; - const repeatedPOS: string[] = []; - const unmatchedPOS: string[] = []; - - for (const entry of data) { - if (entry.pos) { - const pos = entry.pos; - if (uniquePOS.indexOf(pos) === -1) uniquePOS.push(pos); - repeatedPOS.push(pos); - } - } - - console.log('\nUnmatched POS: ') - uniquePOS.forEach((pos: string) => { - const { matchedPOS } = abbreviateTDPartOfSpeech(pos); - if (matchedPOS) { - // console.log(`Matched Unique POS|${pos}|`); - } else { - // console.log(`>> Unmatched Unique POS\n|${pos}|`); - console.log(`${pos}`); - unmatchedPOS.push(pos); - } - }) - - console.log('\nRepeat unmatched POS to get a feel for the quantity of unmatched POS'); - repeatedPOS.forEach((pos: string) => { - const { matchedPOS } = abbreviateTDPartOfSpeech(pos); - if (!matchedPOS) { - // console.log(`>> Unmatched Unique POS\n|${pos}|`); - console.log(`|${pos}|`); - } - }) - - if (unmatchedPOS.length) { - console.log('Not all POS found matches so they will be saved as is (simple strings of text w/o abbreviations or translations).') - // throw new Error(`No abbreviation found for some POS. See log.`); - } - return unmatchedPOS; -} diff --git a/packages/scripts/import/old-firebase-function/helpers/unzip.ts b/packages/scripts/import/old-firebase-function/helpers/unzip.ts deleted file mode 100644 index c97b2c202..000000000 --- a/packages/scripts/import/old-firebase-function/helpers/unzip.ts +++ /dev/null @@ -1,72 +0,0 @@ -import * as fs from 'fs-extra'; -const unzipper = require('unzipper'); - -const dataFileFormats = ['csv', 'json', 'xlsx']; -const imageFileFormats = ['jpg', 'jpeg', 'gif', 'png']; -const audioFileFormats = ['mp3', 'wav']; - -export const unzipArchive = async ( - language: string, - dictionaryId: string, - type: 'old-td' | 'spreadsheet' -) => { - let dataFileName = ''; - let audioFileCount = 0; - let imageFileCount = 0; - fs.mkdirSync(`dictionary/${dictionaryId}/data/`, { recursive: true }); - fs.mkdirSync(`dictionary/${dictionaryId}/audio/`, { recursive: true }); - fs.mkdirSync(`dictionary/${dictionaryId}/images/`, { recursive: true }); - - return await new Promise((resolve, reject): any => { - const filepath = `ready-data/${language}.zip`; - fs.createReadStream(filepath) - .pipe(unzipper.Parse()) - .on('entry', (entry: any) => { - if (entry.path && entry.type === 'File') { - const fileName = entry.path.split('/').pop(); - const fileExt = entry.path.split('.').pop(); - - if (fileName.match(/\?/)) { - // skip over kera_mundari missing file with ? in name - console.log(`Skipping ${fileName} because of ? which caused it to be missing`); - entry.autodrain(); - return; - } - - if (fileName.match(/^\._/)) { - // skip Mac metadata files - entry.autodrain(); - return; - } - - if (dataFileFormats.includes(fileExt.toLowerCase())) { - dataFileName = fileName; - entry.pipe(fs.createWriteStream(`dictionary/${dictionaryId}/data/${fileName}`)); - } else if (audioFileFormats.includes(fileExt.toLowerCase())) { - ++audioFileCount; - entry.pipe(fs.createWriteStream(`dictionary/${dictionaryId}/audio/${fileName}`)); - } else if (imageFileFormats.includes(fileExt.toLowerCase())) { - ++imageFileCount; - entry.pipe(fs.createWriteStream(`dictionary/${dictionaryId}/images/${fileName}`)); - } else { - console.log('No proper file type found for: ', fileName, ' - autodraining'); - entry.autodrain(); - } - } else { - entry.autodrain(); - } - }) - .promise() - .then( - () => { - console.log( - { audioFileCount }, - { imageFileCount }, - `<< number of files found in zip archive` - ); - resolve(dataFileName); - }, - (e: Error) => reject(e) - ); - }); -}; diff --git a/packages/scripts/import/old-firebase-function/importing2.ts b/packages/scripts/import/old-firebase-function/importing2.ts deleted file mode 100644 index 8e0475dea..000000000 --- a/packages/scripts/import/old-firebase-function/importing2.ts +++ /dev/null @@ -1,228 +0,0 @@ -import * as functions from 'firebase-functions'; -import * as admin from 'firebase-admin'; -import * as path from 'path'; -import * as os from 'os'; -import * as fs from 'fs'; - -if (!admin.apps.length) { - admin.initializeApp(); -} - -const db = admin.firestore(); - -import { - FileFORMATS, - transformJsonRow, - transformTDJsonRow, - unzipFirebaseArchive, - validateCSV, - processCSV, - importXLS, -} from './libs/'; - -export const processImport = functions - .runWith({ timeoutSeconds: 540, memory: '2GB' }) - .firestore.document('dictionaries/{dictionaryId}/imports/{importId}') - .onCreate(async (snapshot, context) => { - const functionStart = Date.now(); - const importRef = snapshot.ref; - let processStatus = `processing`; - await importRef.update({ - status: processStatus, - }); - const dictionaryId = context.params.dictionaryId; - const colRef = db.collection(`dictionaries/${dictionaryId}/words`); - let batch = db.batch(); - let stopcount = -1; - let importCount = 0; - const batchSize = 299; - let validations: any = []; - let processResults: any = {}; - //const processENV = context.resource.name.split('/')[1].split('-').pop(); - // gather elements from file upload to prepare for parsing - const importId = context.params.importId; - const snapData = await snapshot.data(); - const importFilePath = snapData.path; - const timestamp = snapData.createdAt; - const uid = snapData.createdBy; - const fileOriginalName = importFilePath.split('/').pop(); - const fileOriginalExt = fileOriginalName.split('.').pop(); - let dictionaryPath = ''; - let isArchived = false; - let isJSON = false; - let isXLS = false; - let isCSV = false; - const bucketInstance = admin.storage().bucket(); - // let prepareDictionary = false; - - if (fileOriginalExt === 'zip') { - // unzip archive first - isArchived = true; - const archiveDir = importFilePath.replace('.zip', ''); - const formatIdx = await unzipFirebaseArchive( - bucketInstance, - importFilePath, - uid, - timestamp, - importId, - dictionaryId - ); - if (formatIdx < 0) { - processStatus = `error`; - await importRef.update({ - entryCount: 0, - error: 'A dictionary manifest does not exist in this archive', - memoryUsage: process.memoryUsage(), - elapsedTime: `${Date.now() - functionStart}`, - status: processStatus, - }); - return 0; - } else { - const archiveManifest = `${archiveDir}/dictionary.${FileFORMATS[formatIdx]}`; - dictionaryPath = path.join(os.tmpdir(), `dictionary.${FileFORMATS[formatIdx]}`); - await bucketInstance.file(archiveManifest).download({ destination: dictionaryPath }); - // mark the format flag with a terse statement - [ - () => { - isCSV = true; - }, - () => { - isJSON = true; - }, - () => { - isXLS = true; - }, - ][formatIdx](); - } - } else if (fileOriginalExt === 'csv') { - // handle simple 'CSV' file upload - isCSV = true; - dictionaryPath = path.join(os.tmpdir(), fileOriginalName); - await bucketInstance.file(importFilePath).download({ destination: dictionaryPath }); - } else if (fileOriginalExt === 'xlsx') { - // handle simple 'XLSX' file upload - isXLS = true; - dictionaryPath = path.join(os.tmpdir(), fileOriginalName); - await bucketInstance.file(importFilePath).download({ destination: dictionaryPath }); - } else if (fileOriginalExt === 'json') { - // handle simple 'JSON' file upload - isJSON = true; - dictionaryPath = path.join(os.tmpdir(), fileOriginalName); - await bucketInstance.file(importFilePath).download({ destination: dictionaryPath }); - } else { - throw new Error( - `UID: ${uid} -- no dictionary.csv or dictionary.json file found in archive or standalone, please resubmit with one included` - ); - } - - if (isCSV) { - // make a first pass through the csv and store errors in the validations collection variable - validations = await validateCSV(uid, dictionaryPath, timestamp, importId, dictionaryId); - if (validations.length > 0) { - await importRef.update({ - error: JSON.stringify(validations), - }); - processStatus = `error`; - } else { - processResults = await processCSV( - db, - uid, - dictionaryPath, - timestamp, - importId, - dictionaryId, - isArchived - ); - if (processResults.ERRORS && processResults.ERRORS.length > 0) { - await importRef.update({ - error: JSON.stringify(processResults.ERRORS), - }); - processStatus = `error`; - } else { - try { - await importRef.update({ - entryCount: processResults.importCount, - memoryUsage: process.memoryUsage(), - elapsedTime: `${Date.now() - functionStart}`, - }); - processStatus = `success`; - } catch (err) { - await importRef.update({ - error: 'errors while updating import status', - }); - processStatus = `error`; - } - } - } // if(validations.length>0) - await importRef.update({ - entryCount: processResults.importCount, - memoryUsage: process.memoryUsage(), - elapsedTime: `${Date.now() - functionStart}`, - status: processStatus, - }); - importCount = processResults.importCount; - return importCount; - } - - let jsonData: any = []; - - if (isXLS) { - // XLSX file parser - jsonData = await importXLS(dictionaryPath); - } - if (isJSON) { - // JSON file parser - jsonData = JSON.parse(fs.readFileSync(dictionaryPath)); - } - - for (const entry of jsonData) { - try { - const entryId = colRef.doc().id; - const docRef = colRef.doc(entryId); - let row = {}; - if (isXLS) { - row = await transformJsonRow( - entry, - uid, - timestamp, - importId, - dictionaryId, - importCount, - isArchived - ); - } - if (isJSON) { - // For importing old Talking Dictionaries - row = await transformTDJsonRow( - entry, - uid, - timestamp, - importId, - dictionaryId, - importCount, - isArchived - ); - } - batch.set(docRef, row); - } catch (err) { - validations.push(err); - } - if (stopcount === batchSize) { - await batch.commit(); - batch = db.batch(); - stopcount = -1; - } - ++stopcount; - ++importCount; - } - - processStatus = `success`; - await batch.commit(); - await importRef.update({ - entryCount: importCount, - memoryUsage: process.memoryUsage(), - elapsedTime: `${Date.now() - functionStart}`, - status: processStatus, - }); - return importCount; - }); diff --git a/packages/scripts/import/old-firebase-function/mock-dictionary.ts b/packages/scripts/import/old-firebase-function/mock-dictionary.ts deleted file mode 100644 index 58e84a32d..000000000 --- a/packages/scripts/import/old-firebase-function/mock-dictionary.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { db } from '../config'; -import { IDictionary } from '@living-dictionaries/types'; -/** - * Create new empty dictionary in Firestore - */ -export const mockDictionary = async (dictionaryId: string, glossLanguages: string[]) => { - const dictionaryDoc: IDictionary = { - id: `${dictionaryId}`, - name: `${dictionaryId}`, - public: true, - entryCount: 0, - glossLanguages, //: ['en', 'es', 'hi', 'or'], - }; - await db.doc(`dictionaries/${dictionaryId}`).set(dictionaryDoc); - return dictionaryDoc; -}; diff --git a/packages/scripts/import/old/convertJsonRowToEntryFormat.test.ts b/packages/scripts/import/old/convertJsonRowToEntryFormat.test.ts deleted file mode 100644 index e1fea8b5f..000000000 --- a/packages/scripts/import/old/convertJsonRowToEntryFormat.test.ts +++ /dev/null @@ -1,282 +0,0 @@ -import { readFileSync } from 'node:fs' -import path from 'node:path' -import { convertJsonRowToEntryFormat, returnArrayFromCommaSeparatedItems } from './convertJsonRowToEntryFormat.js' -import { parseCSVFrom } from './../parse-csv.js' - -describe.skip('convertJsonRowToEntryFormat', () => { - const fakeTimeStamp = 10101010 - - test('glosses', () => { - const csv_rows_without_header: Record[] = [ - { - lexeme: 'dolphin', - es_gloss: 'delfín', - }, - ] - const entries = csv_rows_without_header.map(row => convertJsonRowToEntryFormat(row)) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "gl": { - "es": "delfín", - }, - "lx": "dolphin", - }, - ] - `) - }) - - test('example sentences', () => { - const csv_rows_without_header: Record[] = [ - { - lexeme: 'dolphin', - es_exampleSentence: 'el delfín nada en el océano.', - }, - ] - const entries = csv_rows_without_header.map(row => convertJsonRowToEntryFormat(row)) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "gl": {}, - "lx": "dolphin", - "xs": { - "es": "el delfín nada en el océano.", - }, - }, - ] - `) - }) - - test('semantic domains', () => { - const csv_rows_without_header: Record[] = [ - { - lexeme: 'dolphins', - semanticDomain: '5.15', - semanticDomain2: '1', - semanticDomain_custom: 'the sea!', - }, - ] - const entries = csv_rows_without_header.map(row => convertJsonRowToEntryFormat(row)) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "gl": {}, - "lx": "dolphins", - "sd": [ - "the sea!", - ], - "sdn": [ - "5.15", - "1", - ], - }, - ] - `) - }) - - test('high-level conversion from csv', async () => { - const dictionaryId = 'example-v4' - const file = readFileSync(path.join(__dirname, `./data/${dictionaryId}/${dictionaryId}.csv`), 'utf8') - const rows = parseCSVFrom(file) - const rowsWithoutHeader = removeHeaderRow(rows) - const entries = rowsWithoutHeader.map(row => - convertJsonRowToEntryFormat( - row, - fakeTimeStamp, - - fakeTimeStamp as unknown as FirebaseFirestore.FieldValue, - ), - ) - - expect(entries).toEqual([ - { - ca: 10101010, - di: ['Modern Parisian French'], - gl: { - en: 'car', - es: 'auto', - }, - ii: 'v4-10101010', - lx: 'voiture', - nt: 'small automobile', - ph: 'vwatyʁ', - ps: ['n', 'v'], - sd: ['vehicle|cars'], - sdn: ['5.15', '5'], - ua: 10101010, - xs: { - en: 'I drive my car', - es: 'Conduzco mi auto', - vn: 'Je conduis ma voiture', - }, - }, - { - ca: 10101010, - di: ['Modern Parisian French', 'Quebec French'], - gl: { - en: 'tree', - es: 'árbol', - }, - ii: 'v4-10101010', - lx: 'arbre', - nt: 'generic term for all kinds of trees', - ph: 'aʁbʁ', - ps: ['n', 'adj'], - scn: ['Acer rubrum'], - sdn: ['1.4', '1.2'], - ua: 10101010, - xs: { - en: 'The tree gives us shade', - es: 'El árbol nos da sombra', - vn: 'L\'arbre nous donne de l\'ombre', - }, - }, - { - ca: 10101010, - di: ['Modern Parisian French'], - gl: { - en: 'tube', - es: 'tubo', - }, - ii: 'v4-10101010', - lx: 'tube', - nt: 'a cylindrical device for liquids', - ph: 'tyb', - pl: 'tubes', - ps: ['n'], - sd: ['plumbing'], - sdn: ['5.9'], - ua: 10101010, - xs: { - en: 'The water goes through the tubes', - es: 'El agua pasa a través de los tubos', - vn: 'L\'eau passe à travers les tubes', - }, - }, - { - ca: 10101010, - di: ['Quebec French'], - gl: { - en: 'car', - es: 'auto', - }, - ii: 'v4-10101010', - lx: 'voiture', - nt: 'small automobile', - ph: 'vwɑtYʁ', - ps: ['n'], - sd: ['vehicle'], - sdn: ['5.15'], - sr: ['testing sources'], - ua: 10101010, - xs: { - en: 'I drive my car', - es: 'Conduzco mi auto', - vn: 'Je conduis ma voiture', - }, - }, - { - ca: 10101010, - di: ['Quebec French'], - gl: { - en: 'neutral', - es: 'neutro', - }, - ii: 'v4-10101010', - lx: 'neutre', - ph: 'nøʏ̯tʁ̥', - ps: ['adj'], - ua: 10101010, - xs: { - en: 'My room is painted with a neutral color.', - es: 'Mi habitación está pintada con un color neutro.', - vn: 'Ma chambre est peinte d\'une couleur neutre.', - }, - }, - { - ca: 10101010, - di: ['Quebec French'], - gl: { - en: 'to celebrate', - es: 'celebrar', - }, - ii: 'v4-10101010', - lx: 'fêter', - nt: 'to have a party', - ph: 'fɛɪ̯te', - ps: ['v'], - sr: ['test source', 'with multiples sources, test', 'https://example.com'], - ua: 10101010, - xs: { - en: 'We will really party tonight', - es: 'Vamos a celebrar esta noche', - vn: 'On va vraiment fêter à soir', - }, - }, - { - ca: 10101010, - di: ['Central Luganda'], - gl: { - en: 'I will see you', - es: 'Voy a verte', - }, - ii: 'v4-10101010', - in: '1SG-Fut-2SG-see-Fin.V', - lx: 'njakulaba', - mr: 'n-ja-ku-lab-a', - ps: ['vp'], - ua: 10101010, - }, - { - ca: 10101010, - gl: { - en: 'bye', - es: 'adiós', - }, - ii: 'v4-10101010', - lx: 'vale', - ua: 10101010, - }, - ]) - }) - - test('does not duplicate vernacular', () => { - const csv_rows_without_header: Record[] = [ - { - vernacular_exampleSentence: 'Hello world', - }, - ] - const entries = csv_rows_without_header.map(row => convertJsonRowToEntryFormat(row)) - - expect(entries).toMatchInlineSnapshot(` - [ - { - "gl": {}, - "lx": undefined, - "xs": { - "vn": "Hello world", - }, - }, - ] - `) - }) -}) - -function removeHeaderRow(rows: any[]) { - return rows.splice(1) -} - -describe('returnArrayFromCommaSeparatedItems', () => { - test('splits two comma separated items into an array', () => { - expect(returnArrayFromCommaSeparatedItems('n,v')).toStrictEqual(['n', 'v']) - }) - test('handles unusual comma spacing', () => { - expect(returnArrayFromCommaSeparatedItems('n, v ,adj')).toStrictEqual(['n', 'v', 'adj']) - }) - test('returns empty array from undefined', () => { - expect(returnArrayFromCommaSeparatedItems(undefined)).toStrictEqual([]) - }) -}) diff --git a/packages/scripts/import/old/convertJsonRowToEntryFormat.ts b/packages/scripts/import/old/convertJsonRowToEntryFormat.ts deleted file mode 100644 index 114eb06a9..000000000 --- a/packages/scripts/import/old/convertJsonRowToEntryFormat.ts +++ /dev/null @@ -1,153 +0,0 @@ -import { randomUUID } from 'node:crypto' -import type { ActualDatabaseEntry } from '@living-dictionaries/types' -import type { Timestamp } from 'firebase/firestore' - -interface StandardData { - row: Record - dateStamp?: number - timestamp?: FirebaseFirestore.FieldValue -} - -interface SenseData { - entry_id: string - dictionary_id: string -} - -export function convertJsonRowToEntryFormat( - standard: StandardData, - senseData?: SenseData, -): ActualDatabaseEntry { - const { row, dateStamp, timestamp } = standard - const entry: ActualDatabaseEntry = { lx: row.lexeme, gl: {}, xs: {} } - const sense_regex = /^s\d+_/ - let glossObject: Record = {} - const exampleSentenceObject: Record = {} - const exampleSentenceTranslationObject: Record = {} - let sense_id = randomUUID() - let sentence_id = randomUUID() - let old_key = 2 - - if (row.phonetic) entry.ph = row.phonetic - if (row.morphology) entry.mr = row.morphology - if (row.interlinearization) entry.in = row.interlinearization - if (row.partOfSpeech) entry.ps = returnArrayFromCommaSeparatedItems(row.partOfSpeech) - if (row.dialects) entry.di = row.dialects.split(',').map(dialect => dialect.trim()) - if (row.variant) entry.va = row.variant - if (row.nounClass) entry.nc = row.nounClass - if (row.source) entry.sr = row.source.split('|') - if (row.pluralForm) entry.pl = row.pluralForm - if (row.scientificName) entry.scn = [row.scientificName] - if (row.semanticDomain_custom) entry.sd = [row.semanticDomain_custom] - if (row.ID) entry.ei = row.ID - - if (row.localOrthography) entry.lo1 = row.localOrthography - if (row.localOrthography2) entry.lo2 = row.localOrthography2 - if (row.localOrthography3) entry.lo3 = row.localOrthography3 - if (row.localOrthography4) entry.lo4 = row.localOrthography4 - if (row.localOrthography5) entry.lo5 = row.localOrthography5 - - if (row.notes) entry.nt = row.notes - - for (const [key, value] of Object.entries(row)) { - if (!value) continue - - // gloss fields are labeled using bcp47 language codes followed by '_gloss' (e.g. es_gloss, tpi_gloss) - if (key.includes('_gloss') && !sense_regex.test(key)) { - const [language] = key.split('_gloss') - entry.gl[language] = value - } - - if (key.includes('vernacular_exampleSentence')) { - entry.xs.vn = value - continue // to keep next block from also adding - } - - // example sentence fields are codes followed by '_exampleSentence' - if (key.includes('_exampleSentence')) { - const [language] = key.split('_exampleSentence') - entry.xs[language] = value - } - - if (senseData) { - console.log(`key: ${key}`) - if (key === 'lexeme') - console.log(`lexeme: ${value}`) - const { entry_id, dictionary_id } = senseData - if (sense_regex.test(key)) { - if (key.includes('_gloss')) { - let language_key = key.replace(sense_regex, '') - language_key = language_key.replace('_gloss', '') - console.log(`language key: ${language_key}`) - - if (key === `s${old_key}_${language_key}_gloss`) { - glossObject[language_key] = row[key] - } else { - old_key++ - sense_id = randomUUID() - glossObject = {} - glossObject[language_key] = row[key] - } - console.log(`old key: ${old_key}`) - console.log(`sense id: ${sense_id}`) - update_sense(entry_id, dictionary_id, { glosses: { new: glossObject } }, sense_id) - console.log(`gloss object: ${JSON.stringify(glossObject)}`) - } - - console.log(`sentence id before vernacular example sentence: ${sentence_id}`) - if (key.includes('_vn_ES')) { - let writing_system = key.replace(sense_regex, '') - writing_system = writing_system.replace('_vn_ES', '') - - if (key === `s${old_key}_${writing_system}_vn_ES`) { - sentence_id = randomUUID() - exampleSentenceObject[writing_system] = row[key] - update_sentence(entry_id, dictionary_id, { text: { new: exampleSentenceObject } }, sense_id, sentence_id) - } - } - console.log(`sentence id before translation example sentence: ${sentence_id}`) - if (key.includes('_GES')) { - let language_key = key.replace(sense_regex, '') - language_key = language_key.replace('_GES', '') - - exampleSentenceTranslationObject[language_key] = row[key] - // if (key === `s${old_key}_${language_key}_GES`) { - // console.log('Is it getting here at all??') - // } - update_sentence(entry_id, dictionary_id, { translation: { new: exampleSentenceTranslationObject } }, sense_id, sentence_id) - } - - console.log(`sense id before pos: ${sense_id}`) - if (key.includes('_partOfSpeech')) - update_sense(entry_id, dictionary_id, { parts_of_speech: { new: [row[key]] } }, sense_id) - - if (key.includes('_semanticDomains')) - update_sense(entry_id, dictionary_id, { semantic_domains: { new: [row[key]] } }, sense_id) - - if (key.includes('_nounClass')) - update_sense(entry_id, dictionary_id, { noun_class: { new: [row[key]] } }, sense_id) - } - } - - const semanticDomain_FOLLOWED_BY_OPTIONAL_DIGIT = /^semanticDomain\d*$/ // semanticDomain, semanticDomain2, semanticDomain<#>, but not semanticDomain_custom - if (semanticDomain_FOLLOWED_BY_OPTIONAL_DIGIT.test(key)) { - if (!entry.sdn) entry.sdn = [] - - entry.sdn.push(value.toString()) - } - } - - if (Object.keys(entry.xs).length === 0) - delete entry.xs - - if (!dateStamp) return entry - - entry.ii = `v4-${dateStamp}` - entry.ca = timestamp as Timestamp - entry.ua = timestamp as Timestamp - - return entry -} - -export function returnArrayFromCommaSeparatedItems(string: string): string[] { - return string?.split(',').map(item => item.trim()) || [] -} diff --git a/packages/scripts/import/old/filterArray.ts b/packages/scripts/import/old/filterArray.ts deleted file mode 100644 index 70bb3133f..000000000 --- a/packages/scripts/import/old/filterArray.ts +++ /dev/null @@ -1,25 +0,0 @@ -// #!/usr/bin/env node - -import * as fs from 'fs-extra'; -import * as args from 'commander'; - -args - .version('0.0.1') - .option('-d, --data ', 'Source file path') - .option('-f --filter ', 'Search term to filter by') - .parse(process.argv); - -async function filterArray() { - const file = args.data; - const data = await fs.readJSON(file); - - console.log(filterByValue(data, args.filter)); -} - -function filterByValue(array, string) { - return array.filter((o) => - Object.keys(o).some((k) => o[k].toLowerCase().includes(string.toLowerCase())) - ); -} - -filterArray(); diff --git a/packages/scripts/import/old/gatherPOS.ts b/packages/scripts/import/old/gatherPOS.ts deleted file mode 100644 index 3165ed40e..000000000 --- a/packages/scripts/import/old/gatherPOS.ts +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env node - -import * as fs from 'fs-extra'; -import * as args from 'commander'; - -args.version('0.0.1').option('-s, --src ', 'Source file path').parse(process.argv); - -async function gatherPartsOfSpeech() { - const file = args.src; - let data; - if (file.includes('.json')) { - data = await fs.readJSON(file); - } else { - return console.log('JSON file not found'); - } - - const partsOfSpeech = []; - for (let i = 0; i < data.length; i++) { - const pos = data[i].pos; - - if (partsOfSpeech.indexOf(pos) === -1) partsOfSpeech.push(pos); - } - - fs.writeFile('GatheredPOS.json', JSON.stringify(partsOfSpeech), function (err) { - if (err) { - return console.log(err); - } - console.log('Parts of Speech saved to GatheredPOS.json'); - }); - console.log(partsOfSpeech); -} - -gatherPartsOfSpeech(); diff --git a/packages/scripts/import/old/import-spreadsheet-v4.ts b/packages/scripts/import/old/import-spreadsheet-v4.ts deleted file mode 100644 index ff36b30c4..000000000 --- a/packages/scripts/import/old/import-spreadsheet-v4.ts +++ /dev/null @@ -1,101 +0,0 @@ -import { readFileSync } from 'node:fs' -import type { ActualDatabaseEntry } from '@living-dictionaries/types' -import { db, environment, timestamp } from '../../config-firebase.js' -import { uploadAudioFile, uploadImageFile } from './../import-media.js' -import { parseCSVFrom } from './../parse-csv.js' -import { convertJsonRowToEntryFormat } from './../convertJsonRowToEntryFormat.js' - -const developer_in_charge = 'qkTzJXH24Xfc57cZJRityS6OTn52' // diego@livingtongues.org -> Diego Córdova Nieto; -type unique_speakers = Record -const different_speakers: unique_speakers[] = [] - -export async function importFromSpreadsheet(dictionaryId: string, dry = false) { - const dateStamp = Date.now() - - const file = readFileSync(`./import/data/${dictionaryId}/${dictionaryId}.csv`, 'utf8') - const rows = parseCSVFrom(file) - const entries = await importEntriesToFirebase(dictionaryId, rows, dateStamp, dry) - - console.log( - `Finished ${dry ? 'emulating' : 'importing'} ${entries.length} entries to ${ - environment === 'dev' ? 'http://localhost:3041/' : 'livingdictionaries.app/' - }${dictionaryId} in ${(Date.now() - dateStamp) / 1000} seconds`, - ) - console.log('') - return entries -} - -export async function importEntriesToFirebase( - dictionaryId: string, - rows: any[], - dateStamp: number, - dry = false, -) { - const entries: ActualDatabaseEntry[] = [] - let entryCount = 0 - let batchCount = 0 - let batch = db.batch() - const colRef = db.collection(`dictionaries/${dictionaryId}/words`) - const speakerRef = db.collection('speakers') - const dictionarySpeakerSnapshot = await speakerRef.where('contributingTo', 'array-contains', dictionaryId).get() - dictionarySpeakerSnapshot.docs.forEach(snap => different_speakers.push({ [snap.data().displayName]: snap.id })) - let speakerId - - for (const row of rows) { - if (!row.lexeme || row.lexeme === '(word/phrase)') - continue - - if (!dry && batchCount === 200) { - console.log('Committing batch of entries ending with: ', entryCount) - await batch.commit() - batch = db.batch() - batchCount = 0 - } - - const entryId = colRef.doc().id - const entry = convertJsonRowToEntryFormat(row, dateStamp, timestamp) - - if (row.photoFile) { - const pf = await uploadImageFile(row.photoFile, entryId, dictionaryId, dry) - if (pf) entry.pf = pf - } - - if (row.soundFile) { - speakerId = different_speakers.find(speaker => Object.keys(speaker).includes(row.speakerName))?.[row.speakerName] - if (row.speakerName && !speakerId) { - speakerId = speakerRef.doc().id - different_speakers.push({ [row.speakerName]: speakerId }) - batch.create(speakerRef.doc(speakerId), { - displayName: row.speakerName, - birthplace: row.speakerHometown || '', - decade: Number.parseInt(row.speakerAge) || '', - gender: row.speakerGender || '', - contributingTo: [dictionaryId], - createdAt: timestamp, - createdBy: developer_in_charge, - updatedAt: timestamp, - updatedBy: developer_in_charge, - }) - } - const audioFilePath = await uploadAudioFile(row.soundFile, entryId, dictionaryId, dry) - if (audioFilePath) { - entry.sfs = [{ - path: audioFilePath, - ts: new Date().getTime(), - }] - if (speakerId) - entry.sfs[0].sp = [speakerId] - else - entry.sf.speakerName = row.speakerName // Keep that if for some reason we need the speakername as text only again. - } - } - - entries.push(entry) - batch.create(colRef.doc(entryId), entry) - batchCount++ - entryCount++ - } - console.log(`Committing final batch of entries ending with: ${entryCount}`) - if (!dry) await batch.commit() - return entries -} diff --git a/packages/scripts/import/old/import-spreadsheet.ts b/packages/scripts/import/old/import-spreadsheet.ts deleted file mode 100644 index 61e26ecfe..000000000 --- a/packages/scripts/import/old/import-spreadsheet.ts +++ /dev/null @@ -1,75 +0,0 @@ -import * as fs from 'fs-extra'; -import { environment } from './config-firebase'; -import * as xlsx from 'xlsx'; -import * as csv from 'csvtojson'; -import { importSpreadsheetToFirebase } from './import-spreadsheet-to-firebase'; -import { mockDictionary } from './dev/mock-dictionary'; - -const language = 'kumyk'; -let dictionaryId = language; -const dateStamp = Date.now(); -// @ts-ignore -if (environment === 'dev') { - dictionaryId = dictionaryId + '-' + dateStamp; -} -const dryRun = false; - -async function importFromSpreadsheet() { - const util = require('util'); - const logFile = fs.createWriteStream(`logs/import-${dictionaryId}-${environment}.txt`, { - flags: 'w', - }); // 'a' to append, 'w' to write over file contents - const logStdout = process.stdout; - console.log = function () { - // eslint-disable-next-line prefer-rest-params - logFile.write(util.format.apply(null, arguments) + '\n'); - // eslint-disable-next-line prefer-rest-params - logStdout.write(util.format.apply(null, arguments) + '\n'); - }; - - try { - console.log('importing: ', dictionaryId); - // const dataFileName = await unzipArchive(language, dictionaryId, 'spreadsheet'); - // console.log('returned: ', dataFileName); - // let jsonData = await convertXlsxToJson(dataFileName); - const jsonData = await csv().fromFile(`ready-data/${language}.csv`); - - // @ts-ignore - if (environment === 'dev') { - // const glossLanguages: string[] = findLanguages(data); - const glossLanguages = ['en', 'hi', 'as', 'or']; - if (!dryRun) { - await mockDictionary(dictionaryId, glossLanguages); - } - } - const importedCount = await importSpreadsheetToFirebase( - jsonData, - dictionaryId, - environment, - dateStamp, - dryRun - ); - console.log( - `Finished importing ${importedCount} entries to https://td-${environment}-svelte.web.app/${dictionaryId} in ${ - (Date.now() - dateStamp) / 1000 - } seconds` - ); - return true; - } catch (err) { - console.error(err); - throw new Error(err); - } -} - -importFromSpreadsheet(); - -// MOVE INTO HELPERS FOLDER - -/** - * Takes an Excel file and return the first sheet as an array of JSON objects for each row - */ -export const convertXlsxToJson = (dataFileName: string | any) => { - const workbook = xlsx.readFile(`dictionary/${dictionaryId}/data/${dataFileName}`); - const sheet_name_list = workbook.SheetNames; - return xlsx.utils.sheet_to_json(workbook.Sheets[sheet_name_list[0]]); -}; diff --git a/packages/scripts/import/old/importToFirestore.ts b/packages/scripts/import/old/importToFirestore.ts deleted file mode 100644 index a7c06c6b4..000000000 --- a/packages/scripts/import/old/importToFirestore.ts +++ /dev/null @@ -1,199 +0,0 @@ -#!/usr/bin/env node - -import * as args from 'commander'; -import * as admin from 'firebase-admin'; -import * as fs from 'fs-extra'; -import { join } from 'path'; -import { getImageServingUrl } from './getImageServingUrl'; -import { abbreviatePOS } from './abbreviatePOS'; -import { storage } from './config'; - -args - .version('0.0.1') - .option('-d, --data ', 'Data file path') - .option('-a, --audio ', 'Audio folder path') - .option('-p, --photos [folder path]', 'Photos folder path') //optional argument indicated by square brackets, skip image import if not specified - // .option("-c, --collection ", "Collection path in firestore") - .option('-i, --dictionaryId ', 'Dictionary Id in firestore') - .option('-n, --dictionaryName ', 'Dictionary name, used in saving media files') - .option('-e, --environment [dev/prod]', 'Firebase Project') //optional argument, script uses dev if not specified - .parse(process.argv); - -const devServiceAccount = require('../service-accounts/talking-dictionaries-dev.json'); -const prodServiceAccount = require('../service-accounts/talking-dictionaries-alpha.json'); - -admin.initializeApp({ - credential: admin.credential.cert( - args.environment == 'prod' ? prodServiceAccount : devServiceAccount - ), - databaseURL: `https://talking-dictionaries-${ - args.environment == 'prod' ? 'alpha' : 'dev' - }.firebaseio.com`, -}); -const db = admin.firestore(); - -const fileBucket = `talking-dictionaries-${ - args.environment == 'prod' ? 'alpha' : 'dev' -}.appspot.com`; - -async function importToFirestore() { - try { - const colPath = `dictionaries/${args.dictionaryId}/words`; - const file = args.data; - - const colRef = db.collection(colPath); - const batch = db.batch(); - - let data; - if (file.includes('.json')) { - data = await fs.readJSON(file); - } - - // TODO get script to loop through sets of 500 automatically once it matures - // Firestore 'cannot write more than 500 entities in a single call' so we have to upload in chunks - // See https://github.com/firebase/firebase-admin-java/issues/106 for a possible automated chunking solution - const commitRound = 0; // start at 0 - const batchStart = 0 + 500 * commitRound; - const batchEnd = 499 + 500 * commitRound; - - for (let i = 0; i < data.length; i++) { - if (i < batchStart || i > batchEnd) { - continue; - } - - const entry = data[i]; - entry.lx = entry.lang || ''; - delete entry.lang; - entry.ph = entry.ipa || ''; - delete entry.ipa; - - entry.ps = abbreviatePOS(entry.pos || ''); - delete entry.pos; - - entry.di = entry.dialect || ''; - delete entry.dialect; - - entry.xv = entry.usage_example || ''; - delete entry.usage_example; - - entry.lc = entry.metadata || ''; // location - delete entry.metadata; - - // learn about try/catch so I can convert this to const uploadedAudioPath = await upload...() - // maybe the outer parent catch will even catch this? Test it out. - - const entryId = colRef.doc().id; - - await uploadAudioFile(entry.audio, entry.lx, entryId) - .then((response: any) => { - const dateArray = entry.audio.match(/([0-9]*)_([0-9]*)_([0-9]*)/); - entry.sf = { - cr: entry.authority || '', // speaker - ts: dateArray ? new Date(`${dateArray[1]}, ${dateArray[2]}, ${dateArray[3]}`) : null, - path: response.uploadedAudioPath, - }; - delete entry.audio; - }) - .catch((err) => console.log(err)); - - if (args.photos) { - await uploadImageFile(entry, entryId) - .then((response) => { - entry.pf = response; - delete entry.image; - }) - .catch((err) => console.log(err)); - } else { - entry.pf = null; - delete entry.image; - } - - delete entry.authority; - - entry.sd = entry.semantic_ids || ''; - delete entry.semantic_ids; - - entry.gl = { - English: entry.gloss || '', - Español: entry.es_gloss || '', - }; - delete entry.gloss; - delete entry.es_gloss; - - const docRef = colRef.doc(entryId); - batch.set(docRef, entry); - console.log(`Added ${i} to batch: ${entry.lx}`); - } - - await batch.commit(); - console.log('Firestore import completed successfully.'); - } catch (error) { - console.log('Migration failed!', error); - } -} - -const uploadAudioFile = (audioFileName, lexeme, entryId) => { - return new Promise((resolve, reject) => { - if (!audioFileName) { - reject(`No audio found for ${lexeme}`); - } - - const audioDir = join(__dirname, `../${args.audio}`); - const audioFilePath = join(audioDir, audioFileName); - - const uploadedAudioName = lexeme.replace(/ /g, '_').replace(/\./g, ''); - const audioType = audioFileName.match(/\.[0-9a-z]+$/i); - - const uploadedAudioPath = `audio/${args.dictionaryName}_${args.dictionaryId}/${uploadedAudioName}_${entryId}${audioType}`; - - storage - .bucket(fileBucket) - .upload(audioFilePath, { - destination: uploadedAudioPath, - }) - .then(() => { - resolve({ uploadedAudioPath }); - }) - .catch((err) => { - reject(err); - }); - }); -}; - -const uploadImageFile = async (entry, entryId) => { - // eslint-disable-next-line no-useless-catch - try { - const pictureFileName = entry.image; - const lexeme = entry.lx; - if (!pictureFileName) { - throw `No image found for ${lexeme}`; - } - - const imageDir = join(__dirname, `../${args.photos}`); - const imageFilePath = join(imageDir, pictureFileName); - - const uploadedImageName = lexeme.replace(/ /g, '_').replace(/\./g, ''); - const imageType = pictureFileName.match(/\.[0-9a-z]+$/i); - - const uploadedImagePath = `images/${args.dictionaryName}_${args.dictionaryId}/${uploadedImageName}_${entryId}${imageType}`; - - await storage.bucket(fileBucket).upload(imageFilePath, { - destination: uploadedImagePath, - }); - - const gcsPath = await getImageServingUrl(uploadedImagePath, args.environment); - const dateArray = pictureFileName.match(/([0-9]*)_([0-9]*)_([0-9]*)/); - const pf = { - cr: entry.authority || '', // speaker - ts: dateArray ? new Date(`${dateArray[1]}, ${dateArray[2]}, ${dateArray[3]}`) : null, - path: uploadedImagePath, - gcs: gcsPath, // Google Cloud Storage Link - }; - - return pf; - } catch (err) { - throw err; - } -}; - -importToFirestore(); diff --git a/packages/scripts/import/parseSourceFromNotes.ts b/packages/scripts/import/old/parseSourceFromNotes.ts similarity index 100% rename from packages/scripts/import/parseSourceFromNotes.ts rename to packages/scripts/import/old/parseSourceFromNotes.ts diff --git a/packages/scripts/import/old/tdv1-import/find-languages.ts b/packages/scripts/import/old/tdv1-import/find-languages.ts deleted file mode 100644 index 99eae0be5..000000000 --- a/packages/scripts/import/old/tdv1-import/find-languages.ts +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Logs unique glossing languages and throw an error if any are found that aren't supported in our current set of glossing languages - */ -export const findLanguages = (data: any[]) => { - const uniqueLanguages: string[] = []; - const unmatchedLanguages: string[] = []; - console.log(''); // spacer - - for (const row of data) { - if (row.gloss) { - const language = 'en'; - if (uniqueLanguages.indexOf(language) === -1) uniqueLanguages.push(language); - } - Object.keys(row).forEach((key) => { - // Except for English, gloss fields are labeled using bcp47 language codes followed by '_gloss' (e.g. es_gloss, tpi_gloss) - if (key.includes('_gloss') && row[key]) { - const language = key.split('_gloss')[0]; - if (uniqueLanguages.indexOf(language) === -1) uniqueLanguages.push(language); - } - }); - } - uniqueLanguages.forEach((lang: string) => { - const matched = glossingLanguages.some((glossingLanguage) => { - return glossingLanguage.bcp47 === lang; - }); - if (matched) { - console.log('Matched glossing language: ', lang); - } else { - console.log('>> Unmatched glossing language: ', lang); - unmatchedLanguages.push(lang); - } - }); - console.log(''); // spacer - - if (unmatchedLanguages.length) { - throw new Error(`Found unsupported glossing language(s). See log`); - } - return uniqueLanguages; -}; diff --git a/packages/scripts/import/old/tdv1-import/import-old-td.ts b/packages/scripts/import/old/tdv1-import/import-old-td.ts deleted file mode 100644 index 00061977d..000000000 --- a/packages/scripts/import/old/tdv1-import/import-old-td.ts +++ /dev/null @@ -1,108 +0,0 @@ -import * as fs from 'fs-extra'; -import { environment } from './config'; -import { unzipArchive } from './helpers/unzip'; -import { findUnmatchedPOS } from './helpers/find-unmatched-pos'; -import { importToFirebase } from './helpers/import-to-firebase'; -import { findLanguages } from './helpers/find-languages'; -import { mockDictionary } from './dev/mock-dictionary'; -import { deleteDuplicateEntries } from './helpers/delete-duplicate-entries'; -import { cleanUpData } from './helpers/clean-up-data'; - -// const language = process.argv[2]; -const dryRun = Boolean(process.argv[2] === 'dryRun'); - -if (dryRun) { - console.log('Doing a dry run'); -} - -const iterateThroughDictionaries = async () => { - const languages = [ - // 'ho', // - POS? // check lang: '\nriping', and lang: '\ngur', http://ho.swarthmore.edu/?fields=all&semantic_ids=&q=gur - // 'kera-mundari', // (changed zip from kera_mundari to match kera-mundari url) and missing audio file because of question mark - // 'olukumi', // convert "ib" to "ig" for "Igbo" - // 'jakalteko', // don't have data - // 'wayuunaiki', // changed zip from wayuu to wayuunaiki (old url was wayuu) - // 'herero', // no data - // 'gana', // no data - // 'kgalagadi', // no data - // 'yeyi', // no data - // No geo data below here... - // 'aren-aiome', // changed zip to aren-aiome to match url - // 'kuman', - // 'idio-titan', // changed zip from idio_titan to idio-titan to match site url - // 'kewapi', // no data - // 'muyuw-woodlark', // changed zip to muyuw-woodlark to match url - // 'naasioi', - // 'nalik', - // 'waskia', // no data - // 'weri', - // 'wahgi-waghi', // changed zip to wahgi-waghi to match url - // 'monkox-besiro-chiquitano', // changed zip from chiquitano - // 'ishir-chamacoco', // changed zip from chamacoco - // 'siletz-dee-ni' // changed from siletz - ]; - - // let allUnmatchedPOS = new Set(); - - for (const language of languages) { - const dictionaryId = language; - const dateStamp = Date.now(); - // if (environment === 'dev') { - // dictionaryId = dictionaryId + '-' + dateStamp; - // } - - const util = require('util'); - const logFile = fs.createWriteStream(`logs/import-${dictionaryId}-${environment}.txt`, { - flags: 'w', - }); // 'a' to append, 'w' to write over file contents - const logStdout = process.stdout; - console.log = function () { - // eslint-disable-next-line prefer-rest-params - logFile.write(util.format.apply(null, arguments) + '\n'); - // eslint-disable-next-line prefer-rest-params - logStdout.write(util.format.apply(null, arguments) + '\n'); - }; - await importOldTalkingDictionary(dictionaryId, language, dateStamp, dryRun); - - // For POS dry runs - // const unmatchedPOS = await importOldTalkingDictionary(dictionaryId, language, dateStamp, dryRun); - // if (unmatchedPOS) { - // unmatchedPOS.forEach(pos => allUnmatchedPOS.add(pos)); - // } - } - // allUnmatchedPOS.forEach(pos => console.log(pos)); -}; - -const importOldTalkingDictionary = async ( - dictionaryId: string, - language: string, - dateStamp: number, - dryRun: boolean -) => { - try { - console.log(`Importing ${dictionaryId}`); - const dataFileName = await unzipArchive(language, dictionaryId, 'old-td'); - let data = await fs.readJSON(`dictionary/${dictionaryId}/data/${dataFileName}`); - data = cleanUpData(data); - findUnmatchedPOS(data); // return here for POS dry runs - data = deleteDuplicateEntries(data); - // if (environment === 'dev') { - // const glossLanguages: string[] = findLanguages(data); - // if (!dryRun) { - // await mockDictionary(dictionaryId, glossLanguages) - // } - // } - const importedCount = await importToFirebase(data, dictionaryId, environment, dryRun); - console.log( - `Finished importing ${importedCount} entries to ${environment}/${language} in ${ - (Date.now() - dateStamp) / 1000 - } seconds` - ); - return true; - } catch (err) { - console.error(err); - throw new Error(err); - } -}; - -iterateThroughDictionaries(); diff --git a/packages/scripts/import/post-request.ts b/packages/scripts/import/post-request.ts deleted file mode 100644 index 35e9ddf1b..000000000 --- a/packages/scripts/import/post-request.ts +++ /dev/null @@ -1,55 +0,0 @@ -import { ResponseCodes } from '@living-dictionaries/site/src/lib/constants' - -const default_headers: RequestInit['headers'] = { - 'content-type': 'application/json', -} - -type Return = { - data: ExpectedResponse - error: null -} | { - data: null - error: { status: number, message: string } -} - -export async function post_request, ExpectedResponse extends Record = any>(route: string, data: T, options?: { - fetch?: typeof fetch - headers?: RequestInit['headers'] -}): Promise> { - console.info(data) - // for running through data without db involved - // return { data: { speaker_id: data?.speaker_id, dialect_id: data?.dialect_id }, error: null } - - const fetch_to_use = options?.fetch || fetch - - const response = await fetch_to_use(route, { - method: 'POST', - body: JSON.stringify(data), - headers: options?.headers || default_headers, - }) - - return handleResponse(response) -} - -async function handleResponse>(response: Response): Promise> { - const { status } = response - if (status !== ResponseCodes.OK) { - const responseClone = response.clone() - try { - try { - const body = await response.json() - const error = { status, message: body.message || JSON.stringify(body) } - return { data: null, error } - } catch { - const textBody = await responseClone.text() - return { data: null, error: { status, message: textBody } } - } - } catch (err) { - // @ts-expect-error - return { data: null, error: { status, message: err.message } } - } - } - - const body = await response.json() as ExpectedResponse - return { data: body, error: null } -} diff --git a/packages/scripts/import/row.type.ts b/packages/scripts/import/row.type.ts index 936efa5cd..7ca62ed62 100644 --- a/packages/scripts/import/row.type.ts +++ b/packages/scripts/import/row.type.ts @@ -1,24 +1,27 @@ import type { Glossing_Languages } from '@living-dictionaries/site/src/lib/glosses/glossing-languages' export type Row = { - [key in (Entry | Sense | Sentence | Media)]?: string; + [key in (Entry_Fields | Prefixed_Sense_Fields | Prefixed_Sentence_Fields | Media)]?: string; } // 's3.es_gloss': 'hi', // 'semanticDomain4': '2.3', -// 's2.fr_exampleSentence.3': 'Bonjour docteur', +// 's4.fr_exampleSentence': 'Bonjour docteur', // 's4.default_vernacular_exampleSentence': 'foo bar', +// 's4.logogram_vernacular_exampleSentence': '富吧', +// 's4.fr_exampleSentence.2': 'Bonjour docteur 2', +// 's4.default_vernacular_exampleSentence.2': 'foo bar 2', -type Sense_Prefix = '' | 's2.' | 's3.' | 's4.' | 's5.' | 's6.' | 's7.' | 's8.' | 's9.' -type Number_Suffix = '' | '.2' | '.3' | '.4' | '.5' | '.6' | '.7' | '.8' | '.9' +export type Sense_Prefix = '' | 's2.' | 's3.' | 's4.' | 's5.' | 's6.' | 's7.' | 's8.' | 's9.' +export type Number_Suffix = '' | '.2' | '.3' | '.4' | '.5' | '.6' | '.7' | '.8' | '.9' -type Entry = 'lexeme' | `localOrthography${Number_Suffix}` | 'phonetic' | 'dialects' | 'ID' | 'notes' | 'source' | 'morphology' | 'interlinearization' | 'scientificName' | 'pluralForm' | 'variant' +type Entry_Fields = 'lexeme' | `localOrthography${Number_Suffix}` | 'phonetic' | 'dialects' | 'tags' | 'ID' | 'notes' | 'source' | 'morphology' | 'interlinearization' | 'scientificName' -type Sense = `${Sense_Prefix}${Sense_Fields}` -type Sense_Fields = `${Glossing_Languages}_gloss` | `partOfSpeech${Number_Suffix}` | `semanticDomain${Number_Suffix}` | 'semanticDomain_custom' | 'nounClass' // en_gloss, s2.en_gloss, nounClass, s2.nounClass +type Prefixed_Sense_Fields = `${Sense_Prefix}${Sense_Fields}` +type Sense_Fields = `${Glossing_Languages}_gloss` | `partOfSpeech${Number_Suffix}` | `semanticDomain${Number_Suffix}` | 'semanticDomain_custom' | 'nounClass' | 'pluralForm' | 'variant' // en_gloss, s2.en_gloss, nounClass, s2.nounClass -type Sentence = `${Sense_Prefix}${Sentence_Fields}` | `${Sense_Prefix}${Sentence_Fields}${Number_Suffix}` -type Sentence_Fields = `${Glossing_Languages}_exampleSentence` | `default_vernacular_exampleSentence` | 'vernacular_exampleSentence' +type Writing_Systems = 'default' // TODO improve Writing Systems field +type Prefixed_Sentence_Fields = `${Sense_Prefix}${Sentence_Fields}${Number_Suffix}` +type Sentence_Fields = `${Writing_Systems}_vernacular_exampleSentence` | `${Glossing_Languages}_exampleSentence` -type Media = 'photoFile' | 'soundFile' | 'speakerName' | 'speakerHometown' | 'speakerAge' | 'speakerGender' - -// type Writing_Systems = 'default' // TODO improve Writing Systems field +type Media = 'photoFile' | 'soundFile' | 'speakerName' | 'speakerHometown' | 'speakerAge' | 'speakerGender' // in the future photo and video will also need sense prefixes once multiple media is allowed in the UI +// all media will need number suffixes once multiple media is allowed in the UI diff --git a/packages/scripts/migrate-to-supabase/to-sql-string.ts b/packages/scripts/import/to-sql-string.ts similarity index 93% rename from packages/scripts/migrate-to-supabase/to-sql-string.ts rename to packages/scripts/import/to-sql-string.ts index bb88fc72c..c636ea917 100644 --- a/packages/scripts/migrate-to-supabase/to-sql-string.ts +++ b/packages/scripts/import/to-sql-string.ts @@ -5,11 +5,12 @@ export function sql_file_string convert_to_sql_string(row[column])) const values_string = `(${values.join(', ')})` if (operation === 'INSERT') { - return `INSERT INTO ${table_name} (${column_names_string}) VALUES\n${values_string};` + return `INSERT INTO ${table_name} (${column_names_string}) VALUES\n${values_string};\n` } else if (operation === 'UPSERT') { - return `INSERT INTO ${table_name} (${column_names_string}) VALUES\n${values_string}\nON CONFLICT (id) DO UPDATE SET ${column_names.map(column => `"${column}" = EXCLUDED."${column}"`).join(', ')};` + return `INSERT INTO ${table_name} (${column_names_string}) VALUES\n${values_string}\nON CONFLICT (id) DO UPDATE SET ${column_names.map(column => `"${column}" = EXCLUDED."${column}"`).join(', ')};\n` } } diff --git a/packages/scripts/migrate-to-supabase/notes.md b/packages/scripts/migrate-to-supabase/notes.md index 486825144..28a5baf50 100644 --- a/packages/scripts/migrate-to-supabase/notes.md +++ b/packages/scripts/migrate-to-supabase/notes.md @@ -1,9 +1,9 @@ # Migrate Entries and Speakers from Firestore to Supabase -- email letting everyone know editing is available again +- deploy tags and update indexes +- deal with content-update and content-import interface differences - build new Orama indexes every hour after materialized view is updated -- migrate dictionaries and setup materialized view with entry counts -- get exports working again +- migrate dictionaries and setup materialized view with entry counts (looking for type: "tdv1" and other fields beside the known ones) - If an audio file does not have a speaker still let it play even though speaker needs chosen - ensure all auth users are brought over - Orama: replaceState in createQueryParamStore? look into improving the history to change for view and page changes but not for the others @@ -20,9 +20,8 @@ - drop content_updates' table column - drop entry_updates - make alternate writing systems of the sentence translations as different bcp keys (same as for glosses) -- Remove algolia keys from vercel - change old senses created_by/updated_by from firebase ids to user_ids and then connect relationships and change type to uuid -- add 331 megabytes of content_updates to db, saved a sql queries to avoid upgrading to the $25/month +- add 331 megabytes of content_updates to db, saved sql queries to avoid upgrading to the $25/month - think about find-replacing the "pn/v": "prenoun / preverb", and one other pos with dash when filtering by pos ## Notes diff --git a/packages/scripts/migrate-to-supabase/reset-db.ts b/packages/scripts/migrate-to-supabase/reset-db.ts deleted file mode 100644 index 7b7bf484a..000000000 --- a/packages/scripts/migrate-to-supabase/reset-db.ts +++ /dev/null @@ -1,17 +0,0 @@ -// import { readFileSync } from 'node:fs' -import { jacob_ld_user_id, postgres } from '../config-supabase' - -export async function reset_db() { - console.info('reseting db from seed sql') - - await postgres.execute_query(`truncate table auth.users cascade;`) - await postgres.execute_query('truncate table senses cascade;') - - // const seedFilePath = '../../supabase/seed.sql' - // const seed_sql = readFileSync(seedFilePath, 'utf8') - // await postgres.execute_query(seed_sql) - - const add_user_sql = `INSERT INTO auth.users ("aud", "email", "id", "instance_id", "role") VALUES -('authenticated', 'jacob@livingtongues.org', '${jacob_ld_user_id}', '00000000-0000-0000-0000-000000000000', 'authenticated');` - await postgres.execute_query(add_user_sql) -} diff --git a/packages/scripts/migrate-to-supabase/save-content-update.ts b/packages/scripts/migrate-to-supabase/save-content-update.ts index e470f4e10..8d47be673 100644 --- a/packages/scripts/migrate-to-supabase/save-content-update.ts +++ b/packages/scripts/migrate-to-supabase/save-content-update.ts @@ -1,5 +1,5 @@ import fs from 'node:fs' -import type { ContentImportBody } from '@living-dictionaries/types/supabase/content-import.interface' +import type { ImportContentUpdate } from '@living-dictionaries/types/supabase/content-import.interface' import { jacob_ld_user_id } from '../config-supabase' import { sql_file_string } from './to-sql-string' @@ -14,7 +14,7 @@ function millisecond_incrementing_timestamp(): string { return new Date(yesterday.getTime() + milliseconds_to_add).toISOString() } -export function prepare_sql(body: ContentImportBody) { +export function prepare_sql(body: ImportContentUpdate) { console.info(body) let sql_statements = '' diff --git a/packages/scripts/package.json b/packages/scripts/package.json index 650cbf083..8e4ada7c3 100644 --- a/packages/scripts/package.json +++ b/packages/scripts/package.json @@ -14,19 +14,15 @@ }, "main": "index.ts", "scripts": { - "create-indexes": "tsx create-indexes/add-to-cloudflare.ts -e prod", - "run-migration": "tsx migrate-to-supabase/run-migration.ts -e prod", + "import-dictionary:dev:dry": "tsx import/import.ts --id tseltal", + "import-dictionary:dev:live": "tsx import/import.ts --id tseltal --live", + "import-dictionary:prod:live": "tsx import/import.ts --id tseltal -e prod --live", + "update-locales": "tsx locales/update-locales.ts", + "create-indexes": "tsx create-indexes/add-to-cloudflare.ts", "migrate-users": "tsx migrate-to-supabase/auth.ts", - "countAllEntries": "tsx countAllEntries.ts", - "getEmails": "tsx refactor/get-email.ts -e prod", - "entryRefactor": "tsx refactor/entry-refactor.ts", - "speakerRefactor": "tsx refactor/speaker-refactor.ts", - "updateLocales": "tsx locales/update-locales.ts", - "importDictionary:dev:dry": "tsx import/import.ts --id tseltal", - "importDictionary:dev:live": "tsx import/import.ts --id tseltal --live", - "importDictionary:prod:live": "tsx import/import.ts --id tseltal -e prod --live", + "get-emails": "tsx refactor/get-email.ts -e prod", "test": "vitest", - "test:migration": "vitest --config ./vitest.config.migration.ts" + "test:import": "vitest --config ./vitest.config.import.ts" }, "devDependencies": { "@aws-sdk/client-s3": "^3.679.0", @@ -38,11 +34,9 @@ "@types/pg": "^8.11.8", "@types/stream-chain": "^2.1.0", "@types/stream-json": "^1.7.7", - "algoliasearch": "^4.11.0", "commander": "^9.4.1", "csv-parse": "^5.3.0", "csvtojson": "^2.0.10", - "detect-port": "^1.6.1", "dotenv": "^16.0.2", "firebase": "^10.9.0", "firebase-admin": "^12.7.0", diff --git a/packages/scripts/record-logs.ts b/packages/scripts/record-logs.ts index 42ca148f1..dbf977b1f 100644 --- a/packages/scripts/record-logs.ts +++ b/packages/scripts/record-logs.ts @@ -1,11 +1,14 @@ import fs from 'node:fs' -const logFile = fs.createWriteStream(`./logs/${Date.now()}.txt`, { flags: 'w' }) // 'a' to append, 'w' to truncate the file every time the process starts. -console.log = function (data: any) { - logFile.write(`${JSON.stringify(data)}\n`) - process.stdout.write(`${JSON.stringify(data)}\n`) -} -const postFile = fs.createWriteStream(`./logs/${Date.now()}_post_requests.txt`, { flags: 'w' }) // 'a' to append, 'w' to truncate the file every time the process starts. -console.info = function (data: any) { - postFile.write(`${JSON.stringify(data)}\n`) +if (!process.env.CI) { + const logFile = fs.createWriteStream(`./logs/${Date.now()}.txt`, { flags: 'w' }) // 'a' to append, 'w' to truncate the file every time the process starts. + console.log = function (data: any) { + logFile.write(`${JSON.stringify(data)}\n`) + process.stdout.write(`${JSON.stringify(data)}\n`) + } + + const postFile = fs.createWriteStream(`./logs/${Date.now()}_post_requests.txt`, { flags: 'w' }) // 'a' to append, 'w' to truncate the file every time the process starts. + console.info = function (data: any) { + postFile.write(`${JSON.stringify(data)}\n`) + } } diff --git a/packages/scripts/refactor/entry-refactor.ts b/packages/scripts/refactor/entry-refactor.ts deleted file mode 100644 index a96ca9826..000000000 --- a/packages/scripts/refactor/entry-refactor.ts +++ /dev/null @@ -1,181 +0,0 @@ -import type { ActualDatabaseEntry } from '@living-dictionaries/types' -import { program } from 'commander' -import { db } from '../config-firebase' -import { reverse_semantic_domains_mapping } from './reverse-semantic-domains-mapping' -import { turn_dialect_strings_to_arrays } from './turn-dialects-to-arrays' - -program - // .version('0.0.1') - .option('--id ', 'Dictionary Id') - .option('--live', 'If not included, only log values') - .parse(process.argv) - -const dictionaryId = program.opts().id -const { live } = program.opts() - -async function entryRefactor() { - try { - if (dictionaryId) { - console.log(`---Refactoring: ${dictionaryId}`) - await fetchEntries(dictionaryId) - } else { - const snapshot = await db.collection('dictionaries').get() - for (const dictionarySnap of snapshot.docs) { - // If setting limits on refactoring, you can skip dictionaries beginning with letters that have already been processed: - const done = /^[abcdefghijklmn].*/ - if (!done.test(dictionarySnap.id.toLowerCase())) { - console.log(`---Refactoring: ${dictionarySnap.id}`) - await fetchEntries(dictionarySnap.id) - } - } - } - } catch (error) { - console.error('Refactor failed!') - console.error(error) - } -} - -async function fetchEntries(dictionaryId: string) { - const snapshot = await db.collection(`dictionaries/${dictionaryId}/words`).get() - for (const snap of snapshot.docs) { - const entry: ActualDatabaseEntry = { id: snap.id, ...(snap.data() as ActualDatabaseEntry) } - // await turnSDintoArray(dictionaryId, entry); - // await refactorGloss(dictionaryId, entry); - // await notesToPluralForm(dictionaryId, entry); - // turnPOSintoArray(dictionaryId, entry); // not awaiting so operations can run in parallel otherwise the function errors after about 1400 iterations - // reverese_semantic_domains_in_db(dictionaryId, entry); - // turnDialectsIntoArray(dictionaryId, entry); - turnSoundFileToArray(dictionaryId, entry) - } -} - -async function turnDialectsIntoArray(dictionaryId: string, entry: ActualDatabaseEntry) { - if (entry.di) { - console.log('entry dialect before:') - console.log(entry.di) - if (Array.isArray(entry.di)) - return true - - entry.di = turn_dialect_strings_to_arrays(entry.di) - console.log('entry dialect after:') - console.log(entry.di) - if (!live) return - await db.collection(`dictionaries/${dictionaryId}/words`).doc(entry.id).set(entry) - } - return true -} - -async function reverese_semantic_domains_in_db(dictionaryId: string, entry: ActualDatabaseEntry) { - if (entry.sdn) { - console.log('entry sdn before:') - console.log(entry.sdn) - entry.sdn = reverse_semantic_domains_mapping(entry.sdn) - } - console.log('entry sdn after:') - console.log(entry.sdn) - if (!live) return - await db.collection(`dictionaries/${dictionaryId}/words`).doc(entry.id).set(entry) - return true -} - -async function turnSDintoArray(dictionaryId: string, entry: ActualDatabaseEntry) { - if (entry.sd && typeof entry.sd === 'string') { - console.log('entry sd before: ', entry.sd) - const emptyArray: string[] = [] - emptyArray.push(entry.sd) - entry.sd = emptyArray - console.log('entry sd after: ', entry.sd) - } else if (entry.sd && Array.isArray(entry.sd)) { - console.log('it is an array - do nothing') - } else { - delete entry.sd - } - if (!live) return - await db.collection(`dictionaries/${dictionaryId}/words`).doc(entry.id).set(entry) - return true -} - -let count = 1 -async function turnPOSintoArray(dictionaryId: string, entry: ActualDatabaseEntry) { - if (entry.ps && typeof entry.ps === 'string') { - console.log(`${count}:${dictionaryId}:${entry.id}`) - console.log(entry.ps) - entry.ps = [entry.ps] - console.log(entry.ps) - count++ - if (live) await db.collection(`dictionaries/${dictionaryId}/words`).doc(entry.id).set(entry) - // } else if (entry.ps && entry.ps instanceof Array) { - // console.log(`${dictionaryId}:${entry.id} is already an array`); - } -} - -async function refactorGloss(dictionaryId: string, entry: ActualDatabaseEntry) { - console.log(entry.gl) - for (const key in entry.gl) { - if (key === 'English') { - entry.gl.en = entry.gl[key] - delete entry.gl[key] - } - if (key === 'Spanish') { - entry.gl.es = entry.gl[key] - delete entry.gl[key] - } - if (key === 'Español') { - entry.gl.es = entry.gl[key] - delete entry.gl[key] - } - if (key === 'Bahasa Indonesia') { - entry.gl.id = entry.gl[key] - delete entry.gl[key] - } - if (key === 'French') { - entry.gl.fr = entry.gl[key] - delete entry.gl[key] - } - if (key === 'Mandarin 中文') { - entry.gl.cmn = entry.gl[key] - delete entry.gl[key] - } - } - if (!live) return - await db.collection(`dictionaries/${dictionaryId}/words`).doc(entry.id).set(entry) - return console.log(`${entry.id}: `, entry.gl) -} - -async function notesToPluralForm(dictionaryId: string, entry: ActualDatabaseEntry) { - const ntBefore = entry.nt - if (entry?.nt.startsWith('Plural form:')) { - entry.pl = entry.nt.replace('Plural form: ', '') - delete entry.nt - console.log(`${entry.id}, ntBefore:${ntBefore}, ntAfter:${entry.nt}, pl:${entry.pl}`) - } - if (!live) return - await db.collection(`dictionaries/${dictionaryId}/words`).doc(entry.id).set(entry) - return true -} - -async function turnSoundFileToArray(dictionaryId: string, entry: ActualDatabaseEntry) { - const sfBefore = entry.sf - if (entry.sf?.sp) { - entry.sfs = [{ ...entry.sf, sp: [entry.sf.sp] }] - delete entry.sf - console.log(`${entry.id}, sfBefore:${JSON.stringify(sfBefore)}, sfsAfter:${JSON.stringify(entry.sfs)}, sfNull:${entry.sf}`) - } - if (!live) return - await db.collection(`dictionaries/${dictionaryId}/words`).doc(entry.id).set(entry) - return true -} - -entryRefactor() - -// Single Dictionary -// `pnpm entryRefactor --id babanki` to log refactor in dev -// `pnpm entryRefactor --id babanki --live` to do refactor in dev -// `pnpm entryRefactor --id babanki -e prod` to log refactor in prod -// `pnpm entryRefactor --id babanki --live -e prod` to do refactor in prod - -// All dictionaries -// `pnpm entryRefactor` to log refactor in dev -// `pnpm entryRefactor --live` to do refactor in dev -// `pnpm entryRefactor -e prod` to log refactor in prod -// `pnpm entryRefactor --live -e prod` to do refactor in prod diff --git a/packages/scripts/refactor/get-email.ts b/packages/scripts/refactor/get-email.ts index dccdc99a4..decb22522 100644 --- a/packages/scripts/refactor/get-email.ts +++ b/packages/scripts/refactor/get-email.ts @@ -1,7 +1,7 @@ import type { IUser } from '@living-dictionaries/types' import { db } from '../config-firebase' -const dictionary_ids = ['shauki', 'nongtrai', 'eyak', 'early-assamese', 'werikyana', 'nambya', 'garifuna', 'arvanitika', 'awakateko', 'marma', 'birhor', 'kihunde', 'ngiwa---popoloca', 'memoni', 'judeo-iraqi-arabic', 'namtrik-de-totoro', 'belarusian', 'seniwan', 'zapotec'] +const dictionary_ids = ['shauki', 'nongtrai'] // get_dictionary_emails(dictionary_ids) diff --git a/packages/scripts/refactor/move-firestore-document.ts b/packages/scripts/refactor/move-firestore-document.ts index 198a922f6..54bb62383 100644 --- a/packages/scripts/refactor/move-firestore-document.ts +++ b/packages/scripts/refactor/move-firestore-document.ts @@ -5,9 +5,6 @@ import { db } from '../config-firebase' // copyDoc(`dictionaries/olùkùmi`, `dictionaries/olukumi`, {}, true); // moveDoc(`dictionaries/olùkùmi`, `dictionaries/olukumi`); -// note that entryCount will be doubled since importing entries (words) will fire off the incrementing function -// note that Algolia index will be wiped out after deleting the old dictionary since the objectIds are identical - // from https://leechy.dev/firestore-move export async function moveDoc( oldDocPath: string, diff --git a/packages/scripts/refactor/reverse-semantic-domains-mapping.ts b/packages/scripts/refactor/reverse-semantic-domains-mapping.ts deleted file mode 100644 index 336f7df89..000000000 --- a/packages/scripts/refactor/reverse-semantic-domains-mapping.ts +++ /dev/null @@ -1,81 +0,0 @@ -import { semanticDomains } from '@living-dictionaries/site'; - -export function reverse_semantic_domains_mapping(semantic_domains: string[]): string[] { - const cleaned_semantic_domains = replace_hyphen_with_comma(semantic_domains); - const semantic_domain_number = cleaned_semantic_domains.map((semantic_domain) => { - const domain = update_old_semantic_domains(semantic_domain); - const matched_domain_obj = semanticDomains.find((sd) => sd.name === domain); - return matched_domain_obj?.key || semantic_domain; - }); - return semantic_domain_number; -} - -if (import.meta.vitest) { - describe('reverse_semantic_domains_mapping', () => { - test('converts normal domain strings', () => { - const sdn = ['Universe and the natural world', 'Earth, geology and landscape']; - const expected = ['1', '1.2']; - expect(reverse_semantic_domains_mapping(sdn)).toEqual(expected); - }); - - test('converts domains with hyphens', () => { - const sdn = ['Health - well-being and sickness', 'Earth - geology and landscape']; - const expected = ['2.4', '1.2']; - expect(reverse_semantic_domains_mapping(sdn)).toEqual(expected); - }); - - test('ignores when strings are already the semantic domain keys', () => { - const sdn = ['2.4', '1.2']; - const expected = ['2.4', '1.2']; - expect(reverse_semantic_domains_mapping(sdn)).toEqual(expected); - }); - - test('checks the renamed semantic domains are updated', () => { - const sdn = ['States', 'Physical Actions and States']; - const expected = ['6.5', '6']; - expect(reverse_semantic_domains_mapping(sdn)).toEqual(expected); - }); - }); -} - -function replace_hyphen_with_comma(strings: string[]): string[] { - return strings.map((s) => s.replace(/ -/g, ',')); -} - -if (import.meta.vitest) { - describe('replace_hyphen_with_comma', () => { - test('changes space plus hyphen into comma', () => { - const strings = [ - 'Health - well-being and sickness', - 'Coordinators - Subordinators - Relativizers - Quotatives', - ]; - expect(replace_hyphen_with_comma(strings)).toEqual([ - 'Health, well-being and sickness', - 'Coordinators, Subordinators, Relativizers, Quotatives', - ]); - }); - - test('ignores hyphens without space', () => { - const semantic_domains = [ - 'Pro-forms', - 'Motion', - ]; - expect(replace_hyphen_with_comma(semantic_domains)).toEqual([ - 'Pro-forms', - 'Motion', - ]); - }); - }); -} - -function update_old_semantic_domains(semantic_domain: string): string { - if (semantic_domain === 'States') - return 'States and Characteristics'; - - if (semantic_domain === 'Physical Actions and States') - return 'Physical Actions'; - - return semantic_domain; -} - - diff --git a/packages/scripts/refactor/speaker-refactor.ts b/packages/scripts/refactor/speaker-refactor.ts deleted file mode 100644 index 474a3841a..000000000 --- a/packages/scripts/refactor/speaker-refactor.ts +++ /dev/null @@ -1,161 +0,0 @@ -import type { ActualDatabaseEntry, ISpeaker } from '@living-dictionaries/types' -import { program } from 'commander' -import { db, timestamp } from '../config-firebase' - -program -// .version('0.0.1') - .option('--id ', 'Dictionary Id') - .option('--live', 'If not included, only log values') - .parse(process.argv) - -const dictionaryId = program.opts().id -const { live } = program.opts() - -interface unique_speakers { - id: string - name: string -} -const all_speakers: unique_speakers[] = [] -const developer_in_charge = 'qkTzJXH24Xfc57cZJRityS6OTn52' // diego@livingtongues.org -> Diego Córdova Nieto; -let speakers_to_remove: unique_speakers[] -let speakerDuplicationHandled = false - -async function speakerRefactor() { - try { - if (dictionaryId) { - console.log(`---Refactoring: ${dictionaryId}`) - await fetchEntries(dictionaryId) - } else { - const snapshot = await db.collection('dictionaries').get() - for (const dictionarySnap of snapshot.docs) { - // If setting limits on refactoring, you can skip dictionaries beginning with letters that have already been processed: - const done = /^[abcdefghijklmn].*/ - if (!done.test(dictionarySnap.id.toLowerCase())) { - console.log(`---Refactoring: ${dictionarySnap.id}`) - await fetchEntries(dictionarySnap.id) - } - } - } - } catch (error) { - console.error('Refactor failed!') - console.error(error) - } -} - -async function fetchEntries(dictionaryId: string) { - const speakerCollectionRef = db.collection('speakers') - const dictionarySpeakerSnapshot = await speakerCollectionRef.where('contributingTo', 'array-contains', dictionaryId).get() - dictionarySpeakerSnapshot.docs.forEach(snap => all_speakers.push({ name: snap.data().displayName, id: snap.id })) - const snapshot = await db.collection(`dictionaries/${dictionaryId}/words`).get() - for (const snap of snapshot.docs) { - const entry: ActualDatabaseEntry = { id: snap.id, ...(snap.data() as ActualDatabaseEntry) } - // await addSpeakerIdToEntry(dictionaryId, entry, []); // * Modify this line with real speaker Data like [{birthplace: 'US', gender: 'm', displayName: 'Dano'}, {birthplace: 'US', gender: 'f', displayName: 'Ilo'}, {birthplace: 'Mexico', displayName: 'Cañitas'}] - // await avoidSpeakerDuplication(dictionaryId, entry, ''); - // await changeSpeakerNames(dictionaryId, entry, [''], '') - } - if (speakerDuplicationHandled) - deleteDuplicateSpeakers() -} - -async function addSpeaker(speakerData: ISpeaker) { - const speaker = db.collection('speakers').doc() - console.log(`Saving speaker... speaker id: ${speaker.id}`) - if (!live) return speaker.id - await speaker.set(speakerData) - return speaker.id -} - -function createEntrySoundFiles(entry: ActualDatabaseEntry, speakerId: string, path: string) { - entry.sfs = [{ - ab: developer_in_charge, - sp: [speakerId], - path, - ts: new Date().getTime(), - }] -} - -async function addSpeakerIdToEntry(dictionaryId: string, entry: ActualDatabaseEntry, speakersData: ISpeaker[]) { - const sfBefore = entry.sf - if (entry.sf?.speakerName) { - let speakerId = all_speakers.find(speaker => speaker.name === entry.sf.speakerName)?.id - if (!speakerId && speakersData.some(speaker => speaker.displayName === entry.sf.speakerName)) { - const specificSpeakerData = speakersData.find(speaker => speaker.displayName === entry.sf.speakerName) - speakerId = await addSpeaker({ - ...specificSpeakerData, - displayName: entry.sf.speakerName, - contributingTo: [dictionaryId], - // @ts-expect-error TODO remove once sveltefirets is updated - createdAt: timestamp, - createdBy: developer_in_charge, - // @ts-expect-error TODO remove once sveltefirets is updated - updatedAt: timestamp, - updatedBy: developer_in_charge, - }) - all_speakers.push({ name: entry.sf.speakerName, id: speakerId }) - } - - console.log(entry.id) - console.log(`Before: sf-${JSON.stringify(sfBefore)} sfs-${JSON.stringify(entry?.sfs)}`) - createEntrySoundFiles(entry, speakerId, entry.sf.path) - delete entry.sf - console.log(`After: sf-${JSON.stringify(entry?.sf)} sfs-${JSON.stringify(entry.sfs)}`) - } - if (!live) return - await db.collection(`dictionaries/${dictionaryId}/words`).doc(entry.id).set(entry) -} - -async function avoidSpeakerDuplication(dictionaryId: string, entry: ActualDatabaseEntry, speakerId: string) { - if (entry.sfs) { - const selected_speaker = all_speakers.find(speaker => speaker.id === speakerId) - - if (!speakers_to_remove) - speakers_to_remove = all_speakers.filter(speaker => (speaker.name === selected_speaker.name && speaker.id != selected_speaker.id)) - - if (speakers_to_remove.length > 0) { - if (speakers_to_remove.some(speaker => speaker.id === entry.sfs[0].sp[0])) { - console.log(entry.id) - console.log(`before sfs-${JSON.stringify(entry?.sfs)}`) - entry.sfs[0].sp = [selected_speaker.id] - console.log(`after sfs-${JSON.stringify(entry?.sfs)}`) - } - } - speakerDuplicationHandled = true - if (!live) return - await db.collection(`dictionaries/${dictionaryId}/words`).doc(entry.id).set(entry) - } -} - -async function deleteDuplicateSpeakers() { - if (speakers_to_remove.length > 0) { - for (const speaker of speakers_to_remove) - console.log(`deleting ${JSON.stringify(speaker)}`) - if (!live) return - for (const speaker of speakers_to_remove) - await db.doc(`speakers/${speaker.id}`).delete() - } -} - -async function changeSpeakerNames(dictionaryId: string, entry: ActualDatabaseEntry, old_names: string[], new_name: string) { - if (old_names.includes(entry.sf?.speakerName)) { - console.log(entry.id) - console.log(`sfBefore${JSON.stringify(entry?.sf)}`) - entry.sf.speakerName = new_name - console.log(`sfAfter:${JSON.stringify(entry.sf)}`) - } - if (!live) return - await db.collection(`dictionaries/${dictionaryId}/words`).doc(entry.id).set(entry) -} - -speakerRefactor() - -// Single Dictionary -// `pnpm speakerRefactor --id babanki` to log refactor in dev -// `pnpm speakerRefactor --id babanki --live` to do refactor in dev -// `pnpm speakerRefactor --id babanki -e prod` to log refactor in prod -// `pnpm speakerRefactor --id babanki --live -e prod` to do refactor in prod - -// All dictionaries -// `pnpm speakerRefactor` to log refactor in dev -// `pnpm speakerRefactor --live` to do refactor in dev -// `pnpm speakerRefactor -e prod` to log refactor in prod -// `pnpm speakerRefactor --live -e prod` to do refactor in prod diff --git a/packages/scripts/refactor/turn-dialects-to-arrays.ts b/packages/scripts/refactor/turn-dialects-to-arrays.ts deleted file mode 100644 index 772ae8fd1..000000000 --- a/packages/scripts/refactor/turn-dialects-to-arrays.ts +++ /dev/null @@ -1,21 +0,0 @@ -export function turn_dialect_strings_to_arrays(dialect: string): string[] { - if (dialect.includes(', ')) { - const dialects = dialect.split(', '); - return dialects; - } - return [dialect]; -} - - -if (import.meta.vitest) { - describe('turn_dialect_strings_to_arrays', () => { - test('turns simple dialect string into an array', () => { - const dialect = 'east'; - expect(turn_dialect_strings_to_arrays(dialect)).toEqual(['east']); - }); - test('turns multiple dialects as a string into an array with multiple elements', () => { - const dialect = 'east, west, north, south'; - expect(turn_dialect_strings_to_arrays(dialect)).toEqual(['east', 'west', 'north', 'south']); - }); - }); -} diff --git a/packages/scripts/refactor/upload-old-dictionaries.ts b/packages/scripts/refactor/upload-old-dictionaries.ts deleted file mode 100644 index 2dc2dfc81..000000000 --- a/packages/scripts/refactor/upload-old-dictionaries.ts +++ /dev/null @@ -1,31 +0,0 @@ -import { GeoPoint } from 'firebase-admin/firestore' -import type { IDictionary } from '@living-dictionaries/types' -import { db } from '../config-firebase' -import { tdLocations } from './tdv1-dictionaries'; - -(() => { - try { - tdLocations.forEach(async (dictionary) => { - if (dictionary.properties.icon === 'library-15') { - const [,dictionaryUrl] = dictionary.properties.xlink.match(/http:\/\/talkingdictionary.org\/(.+)/) - const data: Partial = { - name: dictionary.properties.label, - population: dictionary.properties.size, - publishYear: dictionary.properties.date, - coordinates: new GeoPoint( - dictionary.geometry.coordinates[1], - dictionary.geometry.coordinates[0], - ), - url: dictionary.properties.xlink, - type: 'tdv1', - } - if (dictionary.properties.thumbnail) - data.thumbnail = dictionary.properties.thumbnail - - await db.doc(`dictionaries/tdv1-${dictionaryUrl}`).set(data) - } - }) - } catch (err) { - console.log(err) - } -})() diff --git a/packages/scripts/reset-local-db.ts b/packages/scripts/reset-local-db.ts new file mode 100644 index 000000000..caee133a4 --- /dev/null +++ b/packages/scripts/reset-local-db.ts @@ -0,0 +1,26 @@ +// import { readFileSync } from 'node:fs' +import { diego_ld_user_id, environment, jacob_ld_user_id, postgres, test_dictionary_id } from './config-supabase' + +export async function reset_local_db() { + if (environment === 'prod') { + console.error('cannot reset db in production') + return + } + + console.info('reseting db from seed sql') + + await postgres.execute_query(`truncate table auth.users cascade;`) + await postgres.execute_query('truncate table entry_updates cascade;') + + // const seedFilePath = '../../supabase/seed.sql' + // const seed_sql = readFileSync(seedFilePath, 'utf8') + // await postgres.execute_query(seed_sql) + + const add_user_and_dictionary_sql = `INSERT INTO auth.users ("aud", "email", "id", "instance_id", "role") VALUES +('authenticated', 'jacob@livingtongues.org', '${jacob_ld_user_id}', '00000000-0000-0000-0000-000000000000', 'authenticated'), +('authenticated', 'diego@livingtongues.org', '${diego_ld_user_id}', '00000000-0000-0000-0000-000000000000', 'authenticated'); + +INSERT INTO "public"."dictionaries" ("id", "name", "created_at", "created_by", "updated_at", "updated_by") VALUES +('${test_dictionary_id}', 'Test Dictionary', '2024-03-18 14:16:22.367188+00', '${diego_ld_user_id}', '2024-03-18 14:16:22.367188+00', '${diego_ld_user_id}');` + await postgres.execute_query(add_user_and_dictionary_sql) +} diff --git a/packages/scripts/vitest.config.migration.ts b/packages/scripts/vitest.config.import.ts similarity index 77% rename from packages/scripts/vitest.config.migration.ts rename to packages/scripts/vitest.config.import.ts index bfd63ffa6..79415ba16 100644 --- a/packages/scripts/vitest.config.migration.ts +++ b/packages/scripts/vitest.config.import.ts @@ -3,13 +3,13 @@ import { defineConfig } from 'vitest/config' // run separately from other unit tests because it requires local Supabase running export default defineConfig({ test: { - name: 'scripts:migration', + name: 'scripts:import', globals: true, poolOptions: { threads: { singleThread: true, }, }, - include: ['migrate-to-supabase/**/*.test.ts'], + include: ['import/**/*.test.ts'], }, }) diff --git a/packages/scripts/vitest.config.ts b/packages/scripts/vitest.config.ts index 686dcd3b5..76a12d194 100644 --- a/packages/scripts/vitest.config.ts +++ b/packages/scripts/vitest.config.ts @@ -5,6 +5,6 @@ export default defineProject({ name: 'scripts:unit', globals: true, includeSource: ['./import/**/*.ts', './refactor/**/*.ts', './spreadsheet_helpers/**/*.ts'], - exclude: [...defaultExclude, 'migrate-to-supabase/**'], + exclude: [...defaultExclude, 'migrate-to-supabase/**', 'import/**'], }, }) diff --git a/packages/site/src/db-tests/content-update.test.bak b/packages/site/src/db-tests/content-update.test.bak index affae4233..bf0c1e407 100644 --- a/packages/site/src/db-tests/content-update.test.bak +++ b/packages/site/src/db-tests/content-update.test.bak @@ -266,7 +266,7 @@ describe('sense sentence operations', () => { "table": null, "text_id": null, "timestamp": "2024-03-08T00:44:04.6+00:00", - "user_id": "12345678-abcd-efab-cdef-123456789012", + "user_id": "de2d3715-6337-45a3-a81a-d82c3210b2a7", "video_id": null, } `) @@ -299,7 +299,7 @@ describe('sense sentence operations', () => { "table": null, "text_id": null, "timestamp": "2024-03-08T00:44:04.6+00:00", - "user_id": "12345678-abcd-efab-cdef-123456789012", + "user_id": "de2d3715-6337-45a3-a81a-d82c3210b2a7", "video_id": null, }, ] @@ -365,7 +365,7 @@ describe('sense sentence operations', () => { "table": null, "text_id": null, "timestamp": "2024-03-09T00:44:04.6+00:00", - "user_id": "12345678-abcd-efab-cdef-123456789012", + "user_id": "de2d3715-6337-45a3-a81a-d82c3210b2a7", "video_id": null, } `) diff --git a/packages/site/src/docs/Supabase.md b/packages/site/src/docs/Supabase.md index 15430d033..5fbe4ff2e 100644 --- a/packages/site/src/docs/Supabase.md +++ b/packages/site/src/docs/Supabase.md @@ -26,7 +26,7 @@ Once you have run `supabase start` you can open the Studio URL to explore your l Local: - `pnpm generate-types` to run `supabase gen types typescript --local --schema public > packages/types/supabase/generated.types.ts` - save the file to have lint fix auto-run -- `pnpm t merge-types` to merge the generated types with the manually written types +- Uncomment out the last todo test in `merge-types.test.ts` and run `pnpm t merge-types` to merge the generated types with the manually written types (need to make this a CLI) Deployed (we don't use this): - `supabase gen types typescript --project-id=actkqboqpzniojhgtqzw --schema public > packages/site/src/lib/supabase/generated.types.ts` diff --git a/packages/site/src/docs/misc/JSON-to-Firestore-Importer.md b/packages/site/src/docs/misc/JSON-to-Firestore-Importer.md deleted file mode 100644 index 21bf4e6ad..000000000 --- a/packages/site/src/docs/misc/JSON-to-Firestore-Importer.md +++ /dev/null @@ -1,33 +0,0 @@ -# JSON to Firestore Importer (OUTDATED) - -Built with help from https://angularfirebase.com/lessons/import-csv-json-or-excel-to-firestore/ - -## Initial setup - -1. Run `npm install` -2. Compile Typescript into Javscript (see below) -3. Import needed service account json files into /service-accounts -4. Place JSON data, audio files, and photos into appropriate folders (/d-data, /d-audio, /d-photos) - -### To Compile Typescript files into Javascript after making changes - -Run `tsc` (must have installed Typescript beforehand) -or press `Ctrl+Shift+B` and select `tsc: watch - tsconfig.json` to start the Typescript watcher - -## How to Import data+audio/photos to Firestore - -1. Write proper gloss fields into importToFirestore.ts (\*at least until a cross-dictionary pattern can be established) -2. Run `firestore-import --data --audio --photos --dictionaryId --dictionaryName --environment prod` - The script defaults to dev environment if prod not mentioned as environment is an optional argument. See https://github.com/tj/commander.js/ for help with required and optional arguments. - -As an example using abbreviated argument names, to import Chamococo to the dev site, run `firestore-import -d d-data/chamacoco_export.json -a d-audio -p d-photos -i yRl8SvrwmeyckpCHU1X5 -n Chamococo` - -Or `firestore-import -d d-data/photo_test.json -a d-audio -p d-photos -i kRlFo5AymRG2hYWg4mpY -n Spanish` - -### How to run the script to gather parts of speech - -Run `npm run gather-pos -s `, for example: `gather-pos -s data/chamacoco_export.json` - -`npm run import-old-td dryRun` - -Debug .ts file w/o compiling: https://medium.com/@dupski/debug-typescript-in-vs-code-without-compiling-using-ts-node-9d1f4f9a94a diff --git a/packages/site/src/docs/misc/functions-config.md b/packages/site/src/docs/misc/functions-config.md deleted file mode 100644 index d86cc7487..000000000 --- a/packages/site/src/docs/misc/functions-config.md +++ /dev/null @@ -1,12 +0,0 @@ -# Firebase Environment Configuration (Secrets) - -See [Environment configuration](https://firebase.google.com/docs/functions/config-env) and [Firebase CLI](https://firebase.google.com/docs/cli) - -## How to set -- `firebase login` with a user that has access to the desired Firebase project -- If not already on the desired project run `firebase use default` for dev or `firebase use production` for prod. The project options are specified in the `.firebasesrc` file in the project root. -- Read current config with `firebase functions:config:get` -- Set environment variable / secret by running `firebase functions:config:set algolia.app="..."` -- You can read config again if you'd like to verify success -- Set up the config on dev first and then when happy, switch over to production and save the new config variables there as well. -- Next time you deploy functions, the config will be updated on Firebase's servers on a project by project basis. \ No newline at end of file diff --git a/packages/site/src/docs/misc/functions-unit-tests.md b/packages/site/src/docs/misc/functions-unit-tests.md deleted file mode 100644 index b6fb8e4d2..000000000 --- a/packages/site/src/docs/misc/functions-unit-tests.md +++ /dev/null @@ -1,44 +0,0 @@ -# Unit Test Firebase Functions w/ Jest in the command line (OUTDATED) - -## For Contributors: How to run Firebase Functions Testing tools - -1. Procure the service account file from the project admin and save it to the functions directory as `service-account-dev.json` -2. Optional: if testing any functions that depend on environment config variables such as email sends run `firebase functions:config:get > .runtimeconfig.json` as detailed in https://firebase.google.com/docs/functions/local-emulator#set_up_functions_configuration_optional -3. Run `jest main.test` to make sure you have placed the service account correctly. - -## Create new Unit Tests of Functions using Jest - -1. Create a new \*.test.ts file and add `/// ` to the top of the file to avoid type errors. -2. To ensure everything is working, create a basic test like so: -```js -test('foo', () => { - expect(true).toBe(true); -}) -``` -3. Make changes to your \*.test.ts file and then run `npm t`. Alternatively run `npm run test:watch` to actively run tests while developing. - -See https://jestjs.io/docs/en/getting-started for tips - -## Initial Setup of Jest Testing for Typescript Firebase Functions - -1. Install Jest for Typescript: `npm i -D jest typescript` -2. Install typings: `npm i -D ts-jest @types/jest` -3. Create config file, `npx ts-jest config:init` -4. Add to package.json `"test": "jest"` and `"test:watch": "jest --watchAll"` -5. Download the service account file from your Firebase project and save it to the functions directory as `service-account-dev.json` - don't commit this! -6. Install Firebase Functions Test: `npm i firebase-functions-test` -7. Setup test-config.ts as seen in tests/test-config.ts in this repo. - -Read https://firebase.google.com/docs/functions/unit-testing to learn more -Some good testing examples also found in https://github.com/firebase/quickstart-nodejs - -## Manually Test Functions Using the Firebase Shell - -(This is not needed if you use Jest testing tools but it's good to know about.) - -1. Create new function and make sure it's exported from index.ts -2. For anything beyond Firestore and Realtime Database we need to set GOOGLE_APPLICATION_CREDENTIALS - In Powershell on Windows (VS Code) - `$env:GOOGLE_APPLICATION_CREDENTIALS="C:\Apps\talking-dictionaries\functions\service-account-dev.json"` - See https://cloud.google.com/docs/authentication/getting-started#auth-cloud-implicit-nodejs for Linux/MacOS -3. `npm run shell` diff --git a/packages/site/src/docs/misc/import-dictionary.md b/packages/site/src/docs/misc/import-dictionary.md index 10be69bdb..a370913f6 100644 --- a/packages/site/src/docs/misc/import-dictionary.md +++ b/packages/site/src/docs/misc/import-dictionary.md @@ -2,8 +2,7 @@ **Requires** files not checked into GitHub: - `packages/scripts/.env` -- `packages/scripts/service-account-dev.json` -- `packages/scripts/service-account-prod.json` +- `packages/scripts/service-accounts.ts` - Create a new folder in `packages/scripts/import/data` right next to the `example` folder and name it using the dictionary ID of your intended upload. We'll use `kalinago` as an demo in these instructions. Do note that the `example` folder will stay checked in to git but your data won't be because we don't want it in our repo. - Download the CSV from Google Sheets online and place it into your newly created folder. Name it using the dictionary ID, `kalinago.csv` for example. diff --git a/packages/site/src/lib/mocks/seed/tables.ts b/packages/site/src/lib/mocks/seed/tables.ts index 40f996830..a53bd3c65 100644 --- a/packages/site/src/lib/mocks/seed/tables.ts +++ b/packages/site/src/lib/mocks/seed/tables.ts @@ -1,7 +1,7 @@ import type { TablesInsert } from '@living-dictionaries/types' -export const seeded_user_id_1 = '12345678-abcd-efab-cdef-123456789012' -export const seeded_user_id_2 = '12345678-abcd-efab-cdef-123456789013' +export const seeded_user_id_1 = 'de2d3715-6337-45a3-a81a-d82c3210b2a7' +export const seeded_user_id_2 = 'be43b1dd-6c64-494d-b5da-10d70c384433' export const seed_user_email_1 = 'jacob@livingtongues.org' const seeded_user_email_2 = 'diego@livingtongues.org' diff --git a/packages/site/src/lib/search/entries-schema.ts b/packages/site/src/lib/search/entries-schema.ts index b5d3d0160..d046e24c8 100644 --- a/packages/site/src/lib/search/entries-schema.ts +++ b/packages/site/src/lib/search/entries-schema.ts @@ -1,3 +1,4 @@ +import type { EntryView } from '@living-dictionaries/types' import type { Orama } from '@orama/orama' export type EntriesIndex = Orama @@ -22,3 +23,7 @@ export const entries_index_schema = { has_part_of_speech: 'boolean', has_semantic_domain: 'boolean', } as const + +export type Indexed_Entry = { + _lexeme: string[] +} & EntryView diff --git a/packages/site/src/lib/search/multilingual-tokenizer.test.ts b/packages/site/src/lib/search/multilingual-tokenizer.test.ts index 6bad4ccda..9cece7100 100644 --- a/packages/site/src/lib/search/multilingual-tokenizer.test.ts +++ b/packages/site/src/lib/search/multilingual-tokenizer.test.ts @@ -2,20 +2,17 @@ import { tokenize } from './multilingual-tokenizer' describe(tokenize, () => { test('lexemes are searchable starting at any letter', () => { - expect(tokenize('esotmïn', null, '_lexeme')).toMatchInlineSnapshot(` + expect(tokenize('esïtmon', null, '_lexeme')).toMatchInlineSnapshot(` [ - "esotmïn", - "esotmin", - "sotmïn", - "sotmin", - "otmïn", - "otmin", - "tmïn", - "tmin", - "mïn", - "min", - "ïn", - "in", + "esïtmon", + "esitmon", + "sïtmon", + "sitmon", + "ïtmon", + "itmon", + "tmon", + "mon", + "on", ] `) }) diff --git a/packages/site/src/lib/search/search-entries.ts b/packages/site/src/lib/search/search-entries.ts index fb733a762..aea310473 100644 --- a/packages/site/src/lib/search/search-entries.ts +++ b/packages/site/src/lib/search/search-entries.ts @@ -1,6 +1,6 @@ import { type SearchParams as OramaSearchParams, search } from '@orama/orama' import type { QueryParams } from './types' -import type { EntriesIndex } from './entries-schema' +import type { EntriesIndex, Indexed_Entry } from './entries-schema' export interface SearchEntriesOptions { query_params: QueryParams @@ -9,24 +9,26 @@ export interface SearchEntriesOptions { dictionary_id: string } +const last_alphabetical = 'zz' + export async function search_entries({ query_params, entries_per_page, page_index, dictionary_id }: SearchEntriesOptions, index: EntriesIndex) { console.info('searching for', query_params.query) - const lexemeSortBy = (a, b) => { - const a_lx = a[2]._lexeme[0] || 'zz' - const b_lx = b[2]._lexeme[0] || 'zz' - return a_lx.localeCompare(b_lx) - } + const sortBy = (a, b) => { + const [_a_id, _a_score, a_document] = a as [string, number, Indexed_Entry] + const [_b_id, _b_score, b_document] = b as [string, number, Indexed_Entry] - const onondagaSortBy = (a, b) => { - const a_id = a[2].elicitation_id || 'zz' - const b_id = b[2].elicitation_id || 'zz' - if (a_id !== b_id) - return a_id?.localeCompare(b_id) - return a[2].lexeme?.localeCompare(b[2].lexeme) - } + if (dictionary_id === 'onondaga') { + const a_id = a_document.main.elicitation_id || last_alphabetical + const b_id = b_document.main.elicitation_id || last_alphabetical + if (a_id !== b_id) + return a_id.localeCompare(b_id) + } - const sortBy = dictionary_id === 'onondaga' ? onondagaSortBy : lexemeSortBy + const a_lx = a_document._lexeme[0] || last_alphabetical + const b_lx = b_document._lexeme[0] || last_alphabetical + return a_lx.localeCompare(b_lx) + } const orama_search_params: OramaSearchParams = { term: query_params.query, diff --git a/packages/site/src/routes/[dictionaryId]/entry/[entryId]/+page.ts b/packages/site/src/routes/[dictionaryId]/entry/[entryId]/+page.ts index 6f7555fd2..724694a8f 100644 --- a/packages/site/src/routes/[dictionaryId]/entry/[entryId]/+page.ts +++ b/packages/site/src/routes/[dictionaryId]/entry/[entryId]/+page.ts @@ -5,13 +5,13 @@ import { ResponseCodes } from '$lib/constants' import { ENTRY_UPDATED_LOAD_TRIGGER } from '$lib/dbOperations' import { browser } from '$app/environment' -export async function load({ params, depends, parent }) { +export async function load({ params: { dictionaryId: dictionary_id, entryId: entry_id }, depends, parent }) { depends(ENTRY_UPDATED_LOAD_TRIGGER) if (browser) { const { entries } = await parent() if (!get(entries.loading)) { - const entry = get(entries).find(entry => entry.id === params.entryId) + const entry = get(entries).find(entry => entry.id === entry_id) if (entry) { return { @@ -26,9 +26,9 @@ export async function load({ params, depends, parent }) { let entry: Tables<'entries_view'> const { data: entries, error: load_error } = await supabase - .from('entries_view') - .select() - .eq('id', params.entryId) + .rpc('entry_by_id', { + passed_entry_id: entry_id, + }) if (!load_error) { [entry] = entries @@ -36,7 +36,7 @@ export async function load({ params, depends, parent }) { const { data: materialized_entries, error: materialized_load_error } = await supabase .from('materialized_entries_view') .select() - .eq('id', params.entryId) + .eq('id', entry_id) if (materialized_load_error) { error(ResponseCodes.INTERNAL_SERVER_ERROR, materialized_load_error) @@ -46,7 +46,7 @@ export async function load({ params, depends, parent }) { } if (!entry || entry.deleted) - redirect(ResponseCodes.MOVED_PERMANENTLY, `/${params.dictionaryId}`) + redirect(ResponseCodes.MOVED_PERMANENTLY, `/${dictionary_id}`) return { entry, diff --git a/packages/types/supabase/augments.types.ts b/packages/types/supabase/augments.types.ts index aed372781..6316e5b4a 100644 --- a/packages/types/supabase/augments.types.ts +++ b/packages/types/supabase/augments.types.ts @@ -4,22 +4,29 @@ import type { DictionaryPhoto } from '../photo.interface' import type { HostedVideo, UnsupportedFields } from '../.' import type { Change } from './content-update.interface' import type { AudioWithSpeakerIds, EntryMainFields, SenseWithSentences } from './entry.interface' +import type { ImportContentUpdate } from './content-import.interface' export interface DatabaseAugments { public: { Tables: { content_updates: { Row: { - change: Change + change: Change | null table: string | null + type: ImportContentUpdate['type'] | null + data: ImportContentUpdate['data'] | null } Insert: { - change: Change + change?: Change | null table?: string | null + type?: ImportContentUpdate['type'] | null + data?: ImportContentUpdate['data'] | null } Update: { - change?: Change + change?: Change | null table?: string | null + type?: ImportContentUpdate['type'] | null + data?: ImportContentUpdate['data'] | null } } dialects: { @@ -140,6 +147,7 @@ export interface DatabaseAugments { senses: SenseWithSentences[] | null audios: AudioWithSpeakerIds[] | null dialect_ids: string[] | null + tag_ids: string[] | null } } materialized_entries_view: { @@ -148,6 +156,7 @@ export interface DatabaseAugments { senses: SenseWithSentences[] | null audios: AudioWithSpeakerIds[] | null dialect_ids: string[] | null + tag_ids: string[] | null } } videos_view: { @@ -159,17 +168,23 @@ export interface DatabaseAugments { } Functions: { entries_from_timestamp: { - Args: { - get_newer_than: string - dict_id: string - } Returns: { main: EntryMainFields senses: SenseWithSentences[] | null audios: AudioWithSpeakerIds[] | null dialect_ids: string[] | null + tag_ids: string[] | null } } + entry_by_id: { + Returns: { + main: EntryMainFields + senses: SenseWithSentences[] | null + audios: AudioWithSpeakerIds[] | null + dialect_ids: string[] | null + tag_ids: string[] | null + }[] + } } } } diff --git a/packages/types/supabase/combined.types.ts b/packages/types/supabase/combined.types.ts index caa0be9c5..50298ad73 100644 --- a/packages/types/supabase/combined.types.ts +++ b/packages/types/supabase/combined.types.ts @@ -4,6 +4,7 @@ import type { DictionaryPhoto } from '../photo.interface' import type { HostedVideo, UnsupportedFields } from '../.' import type { Change } from './content-update.interface' import type { AudioWithSpeakerIds, EntryMainFields, SenseWithSentences } from './entry.interface' +import type { ImportContentUpdate } from './content-import.interface' export interface Database { public: { @@ -96,17 +97,6 @@ export interface Database { 'id', ] }, - { - foreignKeyName: 'audio_entry_id_fkey' - columns: [ - 'entry_id', - ] - isOneToOne: false - referencedRelation: 'entries_view' - referencedColumns: [ - 'id', - ] - }, { foreignKeyName: 'audio_sentence_id_fkey' columns: [ @@ -236,7 +226,8 @@ export interface Database { content_updates: { Row: { audio_id: string | null - change: Change + change: Change | null + data: ImportContentUpdate['data'] | null dialect_id: string | null dictionary_id: string entry_id: string | null @@ -247,14 +238,17 @@ export interface Database { sentence_id: string | null speaker_id: string | null table: string | null + tag_id: string | null text_id: string | null timestamp: string + type: ImportContentUpdate['type'] | null user_id: string video_id: string | null } Insert: { audio_id?: string | null - change: Change + change?: Change | null + data?: ImportContentUpdate['data'] | null dialect_id?: string | null dictionary_id: string entry_id?: string | null @@ -265,14 +259,17 @@ export interface Database { sentence_id?: string | null speaker_id?: string | null table?: string | null + tag_id?: string | null text_id?: string | null timestamp?: string + type?: ImportContentUpdate['type'] | null user_id?: string video_id?: string | null } Update: { audio_id?: string | null - change?: Change + change?: Change | null + data?: ImportContentUpdate['data'] | null dialect_id?: string | null dictionary_id?: string entry_id?: string | null @@ -283,8 +280,10 @@ export interface Database { sentence_id?: string | null speaker_id?: string | null table?: string | null + tag_id?: string | null text_id?: string | null timestamp?: string + type?: ImportContentUpdate['type'] | null user_id?: string video_id?: string | null } @@ -333,17 +332,6 @@ export interface Database { 'id', ] }, - { - foreignKeyName: 'content_updates_entry_id_fkey' - columns: [ - 'entry_id', - ] - isOneToOne: false - referencedRelation: 'entries_view' - referencedColumns: [ - 'id', - ] - }, { foreignKeyName: 'content_updates_photo_id_fkey' columns: [ @@ -399,6 +387,17 @@ export interface Database { 'id', ] }, + { + foreignKeyName: 'content_updates_tag_id_fkey' + columns: [ + 'tag_id', + ] + isOneToOne: false + referencedRelation: 'tags' + referencedColumns: [ + 'id', + ] + }, { foreignKeyName: 'content_updates_text_id_fkey' columns: [ @@ -854,13 +853,71 @@ export interface Database { 'id', ] }, + ] + } + entry_tags: { + Row: { + created_at: string + created_by: string + deleted: string | null + entry_id: string + tag_id: string + } + Insert: { + created_at?: string + created_by: string + deleted?: string | null + entry_id: string + tag_id: string + } + Update: { + created_at?: string + created_by?: string + deleted?: string | null + entry_id?: string + tag_id?: string + } + Relationships: [ { - foreignKeyName: 'entry_dialects_entry_id_fkey' + foreignKeyName: 'entry_tags_created_by_fkey' + columns: [ + 'created_by', + ] + isOneToOne: false + referencedRelation: 'user_emails' + referencedColumns: [ + 'id', + ] + }, + { + foreignKeyName: 'entry_tags_created_by_fkey' + columns: [ + 'created_by', + ] + isOneToOne: false + referencedRelation: 'users' + referencedColumns: [ + 'id', + ] + }, + { + foreignKeyName: 'entry_tags_entry_id_fkey' columns: [ 'entry_id', ] isOneToOne: false - referencedRelation: 'entries_view' + referencedRelation: 'entries' + referencedColumns: [ + 'id', + ] + }, + { + foreignKeyName: 'entry_tags_tag_id_fkey' + columns: [ + 'tag_id', + ] + isOneToOne: false + referencedRelation: 'tags' referencedColumns: [ 'id', ] @@ -1218,17 +1275,6 @@ export interface Database { 'id', ] }, - { - foreignKeyName: 'foreign_key_entries' - columns: [ - 'entry_id', - ] - isOneToOne: false - referencedRelation: 'entries_view' - referencedColumns: [ - 'id', - ] - }, ] } senses_in_sentences: { @@ -1678,6 +1724,98 @@ export interface Database { }, ] } + tags: { + Row: { + created_at: string + created_by: string + deleted: string | null + dictionary_id: string + id: string + name: string + private: boolean | null + updated_at: string + updated_by: string + } + Insert: { + created_at?: string + created_by: string + deleted?: string | null + dictionary_id: string + id: string + name: string + private?: boolean | null + updated_at?: string + updated_by: string + } + Update: { + created_at?: string + created_by?: string + deleted?: string | null + dictionary_id?: string + id?: string + name?: string + private?: boolean | null + updated_at?: string + updated_by?: string + } + Relationships: [ + { + foreignKeyName: 'tags_created_by_fkey' + columns: [ + 'created_by', + ] + isOneToOne: false + referencedRelation: 'user_emails' + referencedColumns: [ + 'id', + ] + }, + { + foreignKeyName: 'tags_created_by_fkey' + columns: [ + 'created_by', + ] + isOneToOne: false + referencedRelation: 'users' + referencedColumns: [ + 'id', + ] + }, + { + foreignKeyName: 'tags_dictionary_id_fkey' + columns: [ + 'dictionary_id', + ] + isOneToOne: false + referencedRelation: 'dictionaries' + referencedColumns: [ + 'id', + ] + }, + { + foreignKeyName: 'tags_updated_by_fkey' + columns: [ + 'updated_by', + ] + isOneToOne: false + referencedRelation: 'user_emails' + referencedColumns: [ + 'id', + ] + }, + { + foreignKeyName: 'tags_updated_by_fkey' + columns: [ + 'updated_by', + ] + isOneToOne: false + referencedRelation: 'users' + referencedColumns: [ + 'id', + ] + }, + ] + } texts: { Row: { created_at: string @@ -1985,20 +2123,10 @@ export interface Database { id: string | null main: EntryMainFields senses: SenseWithSentences[] | null + tag_ids: string[] | null updated_at: string | null } Relationships: [ - { - foreignKeyName: 'entries_dictionary_id_fkey' - columns: [ - 'dictionary_id', - ] - isOneToOne: false - referencedRelation: 'dictionaries' - referencedColumns: [ - 'id', - ] - }, ] } materialized_entries_view: { @@ -2011,6 +2139,7 @@ export interface Database { id: string | null main: EntryMainFields senses: SenseWithSentences[] | null + tag_ids: string[] | null updated_at: string | null } Relationships: [ @@ -2142,6 +2271,24 @@ export interface Database { senses: SenseWithSentences[] | null audios: AudioWithSpeakerIds[] | null dialect_ids: string[] | null + tag_ids: string[] | null + }[] + } + entry_by_id: { + Args: { + passed_entry_id: string + } + Returns: { + id: string + dictionary_id: string + created_at: string + updated_at: string + deleted: string + main: EntryMainFields + senses: SenseWithSentences[] | null + audios: AudioWithSpeakerIds[] | null + dialect_ids: string[] | null + tag_ids: string[] | null }[] } } diff --git a/packages/types/supabase/content-import.interface.ts b/packages/types/supabase/content-import.interface.ts index dbceb6753..edef5f911 100644 --- a/packages/types/supabase/content-import.interface.ts +++ b/packages/types/supabase/content-import.interface.ts @@ -1,55 +1,43 @@ import type { TablesInsert } from './combined.types' -export interface Change { - type: string - data: Record -} - -export type ContentImportBody = +export type ImportContentUpdate = | Insert_Entry | Insert_Sense - | Upsert_Audio + | Insert_Audio // used Upsert_Audio in migration | Insert_Photo | Insert_Video - | Upsert_Speaker + | Insert_Speaker // used Upsert_Speaker in migration | Assign_Speaker | Insert_Dialect | Assign_Dialect | Insert_Sentence -interface ContentUpdateBase { - update_id: string // id of the change, a uuidv4 created on client to make things idempotent - auth_token: string - dictionary_id: string - import_id: string -} - -interface Insert_Entry extends ContentUpdateBase { +interface Insert_Entry { type: 'insert_entry' data: Omit, 'dictionary_id' | 'id'> entry_id: string } -interface Insert_Dialect extends ContentUpdateBase { +interface Insert_Dialect { type: 'insert_dialect' data: Omit, 'updated_by' | 'dictionary_id' | 'id'> dialect_id: string } -interface Assign_Dialect extends ContentUpdateBase { +interface Assign_Dialect { type: 'assign_dialect' data: { created_by: string, created_at: string } dialect_id: string entry_id: string } -interface Upsert_Speaker extends ContentUpdateBase { - type: 'upsert_speaker' +interface Insert_Speaker { + type: 'insert_speaker' data: Omit, 'updated_by' | 'dictionary_id' | 'id'> speaker_id: string } -interface Assign_Speaker_Base extends ContentUpdateBase { +interface Assign_Speaker_Base { type: 'assign_speaker' data: { created_by: string, created_at: string } speaker_id: string @@ -67,35 +55,35 @@ interface Assign_Speaker_With_Video extends Assign_Speaker_Base { type Assign_Speaker = Assign_Speaker_With_Audio | Assign_Speaker_With_Video -interface Insert_Sense extends ContentUpdateBase { +interface Insert_Sense { type: 'insert_sense' data: Omit, 'dictionary_id' | 'id' | 'entry_id'> sense_id: string | null entry_id: string } -interface Insert_Sentence extends ContentUpdateBase { +interface Insert_Sentence { type: 'insert_sentence' data: Omit, 'created_by' | 'updated_by' | 'dictionary_id' | 'id'> sentence_id: string sense_id: string } -interface Insert_Photo extends ContentUpdateBase { +interface Insert_Photo { type: 'insert_photo' data: Omit, 'created_by' | 'updated_by' | 'dictionary_id' | 'id'> photo_id: string sense_id: string } -interface Upsert_Audio extends ContentUpdateBase { - type: 'upsert_audio' +interface Insert_Audio { + type: 'insert_audio' data: Omit, 'updated_by' | 'dictionary_id' | 'id'> audio_id: string entry_id: string } -interface Insert_Video extends ContentUpdateBase { +interface Insert_Video { type: 'insert_video' data: Omit, 'created_by' | 'updated_by' | 'dictionary_id' | 'id'> video_id: string diff --git a/packages/types/supabase/generated.types.ts b/packages/types/supabase/generated.types.ts index 5ab12cbd2..fdfb24648 100644 --- a/packages/types/supabase/generated.types.ts +++ b/packages/types/supabase/generated.types.ts @@ -81,13 +81,6 @@ export interface Database { referencedRelation: 'entries' referencedColumns: ['id'] }, - { - foreignKeyName: 'audio_entry_id_fkey' - columns: ['entry_id'] - isOneToOne: false - referencedRelation: 'entries_view' - referencedColumns: ['id'] - }, { foreignKeyName: 'audio_sentence_id_fkey' columns: ['sentence_id'] @@ -181,7 +174,8 @@ export interface Database { content_updates: { Row: { audio_id: string | null - change: Json + change: Json | null + data: Json | null dialect_id: string | null dictionary_id: string entry_id: string | null @@ -192,14 +186,17 @@ export interface Database { sentence_id: string | null speaker_id: string | null table: Database['public']['Enums']['content_tables'] | null + tag_id: string | null text_id: string | null timestamp: string + type: string | null user_id: string video_id: string | null } Insert: { audio_id?: string | null - change: Json + change?: Json | null + data?: Json | null dialect_id?: string | null dictionary_id: string entry_id?: string | null @@ -210,14 +207,17 @@ export interface Database { sentence_id?: string | null speaker_id?: string | null table?: Database['public']['Enums']['content_tables'] | null + tag_id?: string | null text_id?: string | null timestamp?: string + type?: string | null user_id?: string video_id?: string | null } Update: { audio_id?: string | null - change?: Json + change?: Json | null + data?: Json | null dialect_id?: string | null dictionary_id?: string entry_id?: string | null @@ -228,8 +228,10 @@ export interface Database { sentence_id?: string | null speaker_id?: string | null table?: Database['public']['Enums']['content_tables'] | null + tag_id?: string | null text_id?: string | null timestamp?: string + type?: string | null user_id?: string video_id?: string | null } @@ -262,13 +264,6 @@ export interface Database { referencedRelation: 'entries' referencedColumns: ['id'] }, - { - foreignKeyName: 'content_updates_entry_id_fkey' - columns: ['entry_id'] - isOneToOne: false - referencedRelation: 'entries_view' - referencedColumns: ['id'] - }, { foreignKeyName: 'content_updates_photo_id_fkey' columns: ['photo_id'] @@ -304,6 +299,13 @@ export interface Database { referencedRelation: 'speakers_view' referencedColumns: ['id'] }, + { + foreignKeyName: 'content_updates_tag_id_fkey' + columns: ['tag_id'] + isOneToOne: false + referencedRelation: 'tags' + referencedColumns: ['id'] + }, { foreignKeyName: 'content_updates_text_id_fkey' columns: ['text_id'] @@ -667,11 +669,57 @@ export interface Database { referencedRelation: 'entries' referencedColumns: ['id'] }, + ] + } + entry_tags: { + Row: { + created_at: string + created_by: string + deleted: string | null + entry_id: string + tag_id: string + } + Insert: { + created_at?: string + created_by: string + deleted?: string | null + entry_id: string + tag_id: string + } + Update: { + created_at?: string + created_by?: string + deleted?: string | null + entry_id?: string + tag_id?: string + } + Relationships: [ + { + foreignKeyName: 'entry_tags_created_by_fkey' + columns: ['created_by'] + isOneToOne: false + referencedRelation: 'user_emails' + referencedColumns: ['id'] + }, { - foreignKeyName: 'entry_dialects_entry_id_fkey' + foreignKeyName: 'entry_tags_created_by_fkey' + columns: ['created_by'] + isOneToOne: false + referencedRelation: 'users' + referencedColumns: ['id'] + }, + { + foreignKeyName: 'entry_tags_entry_id_fkey' columns: ['entry_id'] isOneToOne: false - referencedRelation: 'entries_view' + referencedRelation: 'entries' + referencedColumns: ['id'] + }, + { + foreignKeyName: 'entry_tags_tag_id_fkey' + columns: ['tag_id'] + isOneToOne: false + referencedRelation: 'tags' referencedColumns: ['id'] }, ] @@ -966,13 +1014,6 @@ export interface Database { referencedRelation: 'entries' referencedColumns: ['id'] }, - { - foreignKeyName: 'foreign_key_entries' - columns: ['entry_id'] - isOneToOne: false - referencedRelation: 'entries_view' - referencedColumns: ['id'] - }, ] } senses_in_sentences: { @@ -1318,6 +1359,78 @@ export interface Database { }, ] } + tags: { + Row: { + created_at: string + created_by: string + deleted: string | null + dictionary_id: string + id: string + name: string + private: boolean | null + updated_at: string + updated_by: string + } + Insert: { + created_at?: string + created_by: string + deleted?: string | null + dictionary_id: string + id: string + name: string + private?: boolean | null + updated_at?: string + updated_by: string + } + Update: { + created_at?: string + created_by?: string + deleted?: string | null + dictionary_id?: string + id?: string + name?: string + private?: boolean | null + updated_at?: string + updated_by?: string + } + Relationships: [ + { + foreignKeyName: 'tags_created_by_fkey' + columns: ['created_by'] + isOneToOne: false + referencedRelation: 'user_emails' + referencedColumns: ['id'] + }, + { + foreignKeyName: 'tags_created_by_fkey' + columns: ['created_by'] + isOneToOne: false + referencedRelation: 'users' + referencedColumns: ['id'] + }, + { + foreignKeyName: 'tags_dictionary_id_fkey' + columns: ['dictionary_id'] + isOneToOne: false + referencedRelation: 'dictionaries' + referencedColumns: ['id'] + }, + { + foreignKeyName: 'tags_updated_by_fkey' + columns: ['updated_by'] + isOneToOne: false + referencedRelation: 'user_emails' + referencedColumns: ['id'] + }, + { + foreignKeyName: 'tags_updated_by_fkey' + columns: ['updated_by'] + isOneToOne: false + referencedRelation: 'users' + referencedColumns: ['id'] + }, + ] + } texts: { Row: { created_at: string @@ -1557,17 +1670,10 @@ export interface Database { id: string | null main: Json | null senses: Json | null + tag_ids: Json | null updated_at: string | null } - Relationships: [ - { - foreignKeyName: 'entries_dictionary_id_fkey' - columns: ['dictionary_id'] - isOneToOne: false - referencedRelation: 'dictionaries' - referencedColumns: ['id'] - }, - ] + Relationships: [] } materialized_entries_view: { Row: { @@ -1579,6 +1685,7 @@ export interface Database { id: string | null main: Json | null senses: Json | null + tag_ids: Json | null updated_at: string | null } Relationships: [] @@ -1696,6 +1803,24 @@ export interface Database { senses: Json audios: Json dialect_ids: Json + tag_ids: Json + }[] + } + entry_by_id: { + Args: { + passed_entry_id: string + } + Returns: { + id: string + dictionary_id: string + created_at: string + updated_at: string + deleted: string + main: Json + senses: Json + audios: Json + dialect_ids: Json + tag_ids: Json }[] } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 560dce44a..a782066d3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -74,9 +74,6 @@ importers: '@types/stream-json': specifier: ^1.7.7 version: 1.7.7 - algoliasearch: - specifier: ^4.11.0 - version: 4.13.0 commander: specifier: ^9.4.1 version: 9.4.1 @@ -86,9 +83,6 @@ importers: csvtojson: specifier: ^2.0.10 version: 2.0.10 - detect-port: - specifier: ^1.6.1 - version: 1.6.1 dotenv: specifier: ^16.0.2 version: 16.0.2 @@ -309,48 +303,6 @@ packages: resolution: {integrity: sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA==} engines: {node: '>=0.10.0'} - '@algolia/cache-browser-local-storage@4.13.0': - resolution: {integrity: sha512-nj1vHRZauTqP/bluwkRIgEADEimqojJgoTRCel5f6q8WCa9Y8QeI4bpDQP28FoeKnDRYa3J5CauDlN466jqRhg==} - - '@algolia/cache-common@4.13.0': - resolution: {integrity: sha512-f9mdZjskCui/dA/fA/5a+6hZ7xnHaaZI5tM/Rw9X8rRB39SUlF/+o3P47onZ33n/AwkpSbi5QOyhs16wHd55kA==} - - '@algolia/cache-in-memory@4.13.0': - resolution: {integrity: sha512-hHdc+ahPiMM92CQMljmObE75laYzNFYLrNOu0Q3/eyvubZZRtY2SUsEEgyUEyzXruNdzrkcDxFYa7YpWBJYHAg==} - - '@algolia/client-account@4.13.0': - resolution: {integrity: sha512-FzFqFt9b0g/LKszBDoEsW+dVBuUe1K3scp2Yf7q6pgHWM1WqyqUlARwVpLxqyc+LoyJkTxQftOKjyFUqddnPKA==} - - '@algolia/client-analytics@4.13.0': - resolution: {integrity: sha512-klmnoq2FIiiMHImkzOm+cGxqRLLu9CMHqFhbgSy9wtXZrqb8BBUIUE2VyBe7azzv1wKcxZV2RUyNOMpFqmnRZA==} - - '@algolia/client-common@4.13.0': - resolution: {integrity: sha512-GoXfTp0kVcbgfSXOjfrxx+slSipMqGO9WnNWgeMmru5Ra09MDjrcdunsiiuzF0wua6INbIpBQFTC2Mi5lUNqGA==} - - '@algolia/client-personalization@4.13.0': - resolution: {integrity: sha512-KneLz2WaehJmNfdr5yt2HQETpLaCYagRdWwIwkTqRVFCv4DxRQ2ChPVW9jeTj4YfAAhfzE6F8hn7wkQ/Jfj6ZA==} - - '@algolia/client-search@4.13.0': - resolution: {integrity: sha512-blgCKYbZh1NgJWzeGf+caKE32mo3j54NprOf0LZVCubQb3Kx37tk1Hc8SDs9bCAE8hUvf3cazMPIg7wscSxspA==} - - '@algolia/logger-common@4.13.0': - resolution: {integrity: sha512-8yqXk7rMtmQJ9wZiHOt/6d4/JDEg5VCk83gJ39I+X/pwUPzIsbKy9QiK4uJ3aJELKyoIiDT1hpYVt+5ia+94IA==} - - '@algolia/logger-console@4.13.0': - resolution: {integrity: sha512-YepRg7w2/87L0vSXRfMND6VJ5d6699sFJBRWzZPOlek2p5fLxxK7O0VncYuc/IbVHEgeApvgXx0WgCEa38GVuQ==} - - '@algolia/requester-browser-xhr@4.13.0': - resolution: {integrity: sha512-Dj+bnoWR5MotrnjblzGKZ2kCdQi2cK/VzPURPnE616NU/il7Ypy6U6DLGZ/ZYz+tnwPa0yypNf21uqt84fOgrg==} - - '@algolia/requester-common@4.13.0': - resolution: {integrity: sha512-BRTDj53ecK+gn7ugukDWOOcBRul59C4NblCHqj4Zm5msd5UnHFjd/sGX+RLOEoFMhetILAnmg6wMrRrQVac9vw==} - - '@algolia/requester-node-http@4.13.0': - resolution: {integrity: sha512-9b+3O4QFU4azLhGMrZAr/uZPydvzOR4aEZfSL8ZrpLZ7fbbqTO0S/5EVko+QIgglRAtVwxvf8UJ1wzTD2jvKxQ==} - - '@algolia/transporter@4.13.0': - resolution: {integrity: sha512-8tSQYE+ykQENAdeZdofvtkOr5uJ9VcQSWgRhQ9h01AehtBIPAczk/b2CLrMsw5yQZziLs5cZ3pJ3478yI+urhA==} - '@ampproject/remapping@2.2.1': resolution: {integrity: sha512-lFMjJTrFL3j7L9yBxwYfCq2k6qqwHyzuUl/XBnif78PWTJYyL/dfowQHWE3sp6U6ZzqWiiIZnpTMO96zhkjwtg==} engines: {node: '>=6.0.0'} @@ -3082,10 +3034,6 @@ packages: engines: {node: '>=0.4.0'} hasBin: true - address@1.2.2: - resolution: {integrity: sha512-4B/qKCfeE/ODUaAUpSwfzazo5x29WD4r3vXiWsB7I2mSDAihwEqKO+g8GELZUQSSAo5e1XTYh3ZVfLyxBc12nA==} - engines: {node: '>= 10.0.0'} - agent-base@6.0.2: resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} engines: {node: '>= 6.0.0'} @@ -3101,9 +3049,6 @@ packages: ajv@6.12.6: resolution: {integrity: sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==} - algoliasearch@4.13.0: - resolution: {integrity: sha512-oHv4faI1Vl2s+YC0YquwkK/TsaJs79g2JFg5FDm2rKN12VItPTAeQ7hyJMHarOPPYuCnNC5kixbtcqvb21wchw==} - ansi-escapes@7.0.0: resolution: {integrity: sha512-GdYO7a61mR0fOlAsvC9/rIHf7L96sBc6dEWzeOu+KAea5bZyQRPIpojrVoI4AXGJS/ycu/fBTdLrUkA4ODrvjw==} engines: {node: '>=18'} @@ -3608,11 +3553,6 @@ packages: resolution: {integrity: sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==} engines: {node: '>=8'} - detect-port@1.6.1: - resolution: {integrity: sha512-CmnVc+Hek2egPx1PeTFVta2W78xy2K/9Rkf6cC4T59S50tVnzKj+tnx5mmx5lwvCkujZ4uRrpRSuV+IVs3f90Q==} - engines: {node: '>= 4.0.0'} - hasBin: true - devalue@5.1.1: resolution: {integrity: sha512-maua5KUiapvEwiEAe+XnlZ3Rh0GD+qI1J/nb9vrJc3muPXvcF/8gXYTWF76+5DAqHyDUtOIImEuo0YKE9mshVw==} @@ -6456,68 +6396,6 @@ snapshots: '@aashutoshrathi/word-wrap@1.2.6': {} - '@algolia/cache-browser-local-storage@4.13.0': - dependencies: - '@algolia/cache-common': 4.13.0 - - '@algolia/cache-common@4.13.0': {} - - '@algolia/cache-in-memory@4.13.0': - dependencies: - '@algolia/cache-common': 4.13.0 - - '@algolia/client-account@4.13.0': - dependencies: - '@algolia/client-common': 4.13.0 - '@algolia/client-search': 4.13.0 - '@algolia/transporter': 4.13.0 - - '@algolia/client-analytics@4.13.0': - dependencies: - '@algolia/client-common': 4.13.0 - '@algolia/client-search': 4.13.0 - '@algolia/requester-common': 4.13.0 - '@algolia/transporter': 4.13.0 - - '@algolia/client-common@4.13.0': - dependencies: - '@algolia/requester-common': 4.13.0 - '@algolia/transporter': 4.13.0 - - '@algolia/client-personalization@4.13.0': - dependencies: - '@algolia/client-common': 4.13.0 - '@algolia/requester-common': 4.13.0 - '@algolia/transporter': 4.13.0 - - '@algolia/client-search@4.13.0': - dependencies: - '@algolia/client-common': 4.13.0 - '@algolia/requester-common': 4.13.0 - '@algolia/transporter': 4.13.0 - - '@algolia/logger-common@4.13.0': {} - - '@algolia/logger-console@4.13.0': - dependencies: - '@algolia/logger-common': 4.13.0 - - '@algolia/requester-browser-xhr@4.13.0': - dependencies: - '@algolia/requester-common': 4.13.0 - - '@algolia/requester-common@4.13.0': {} - - '@algolia/requester-node-http@4.13.0': - dependencies: - '@algolia/requester-common': 4.13.0 - - '@algolia/transporter@4.13.0': - dependencies: - '@algolia/cache-common': 4.13.0 - '@algolia/logger-common': 4.13.0 - '@algolia/requester-common': 4.13.0 - '@ampproject/remapping@2.2.1': dependencies: '@jridgewell/gen-mapping': 0.3.4 @@ -10717,8 +10595,6 @@ snapshots: acorn@8.14.0: {} - address@1.2.2: {} - agent-base@6.0.2: dependencies: debug: 4.3.4 @@ -10745,23 +10621,6 @@ snapshots: json-schema-traverse: 0.4.1 uri-js: 4.4.1 - algoliasearch@4.13.0: - dependencies: - '@algolia/cache-browser-local-storage': 4.13.0 - '@algolia/cache-common': 4.13.0 - '@algolia/cache-in-memory': 4.13.0 - '@algolia/client-account': 4.13.0 - '@algolia/client-analytics': 4.13.0 - '@algolia/client-common': 4.13.0 - '@algolia/client-personalization': 4.13.0 - '@algolia/client-search': 4.13.0 - '@algolia/logger-common': 4.13.0 - '@algolia/logger-console': 4.13.0 - '@algolia/requester-browser-xhr': 4.13.0 - '@algolia/requester-common': 4.13.0 - '@algolia/requester-node-http': 4.13.0 - '@algolia/transporter': 4.13.0 - ansi-escapes@7.0.0: dependencies: environment: 1.1.0 @@ -11219,13 +11078,6 @@ snapshots: detect-libc@2.0.3: {} - detect-port@1.6.1: - dependencies: - address: 1.2.2 - debug: 4.3.4 - transitivePeerDependencies: - - supports-color - devalue@5.1.1: {} diff@5.2.0: {} diff --git a/supabase/migrations/20241024024631_faster_entries_view.sql b/supabase/migrations/20241024024631_faster_entries_view.sql index d7c2906fe..30db6c3e1 100644 --- a/supabase/migrations/20241024024631_faster_entries_view.sql +++ b/supabase/migrations/20241024024631_faster_entries_view.sql @@ -1,7 +1,7 @@ -- '2024-03-09 16:35:50+00', birhor should get a bit more than 1000 entries CREATE INDEX ON entries USING btree ("updated_at", "dictionary_id"); --- DROP FUNCTION entries_from_timestamp(timestamp with time zone, text) CASCADE; +-- function updated in newer migration CREATE OR REPLACE FUNCTION entries_from_timestamp( get_newer_than timestamp with time zone, dict_id text diff --git a/supabase/migrations/20241127042644_tags_content-update-type-and-data.sql b/supabase/migrations/20241127042644_tags_content-update-type-and-data.sql new file mode 100644 index 000000000..805042c0e --- /dev/null +++ b/supabase/migrations/20241127042644_tags_content-update-type-and-data.sql @@ -0,0 +1,329 @@ +CREATE TABLE tags ( + id uuid unique primary key NOT NULL, + dictionary_id text NOT NULL REFERENCES dictionaries ON DELETE CASCADE, + name text NOT NULL, + private boolean, + created_at timestamp with time zone DEFAULT now() NOT NULL, + created_by uuid NOT NULL REFERENCES auth.users, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + updated_by uuid NOT NULL REFERENCES auth.users, + deleted timestamp with time zone +); + +ALTER TABLE tags ENABLE ROW LEVEL SECURITY; + +CREATE POLICY "Anyone can view tags" +ON tags +FOR SELECT USING (true); + +CREATE TABLE entry_tags ( + entry_id text NOT NULL REFERENCES entries ON DELETE CASCADE, + tag_id uuid NOT NULL REFERENCES tags ON DELETE CASCADE, + created_by uuid NOT NULL REFERENCES auth.users, + created_at timestamp with time zone DEFAULT now() NOT NULL, + deleted timestamp with time zone, + PRIMARY KEY (entry_id, tag_id) +); + +ALTER TABLE entry_tags ENABLE ROW LEVEL SECURITY; + +CREATE TRIGGER set_created_by_trigger_tags +BEFORE UPDATE ON tags +FOR EACH ROW +EXECUTE FUNCTION set_created_by(); + +ALTER TABLE content_updates +ALTER COLUMN change DROP NOT NULL, +ADD COLUMN "data" jsonb, +ADD COLUMN "type" text, +ADD COLUMN tag_id uuid REFERENCES tags; + +CREATE INDEX idx_entry_tags_entry_id ON entry_tags (entry_id); +CREATE INDEX idx_entry_tags_non_deleted ON entry_tags (entry_id) WHERE deleted IS NULL; + +DROP FUNCTION entries_from_timestamp(timestamp with time zone, text) CASCADE; -- must drop and recreate if changing the shape of the function +CREATE FUNCTION entries_from_timestamp( + get_newer_than timestamp with time zone, + dict_id text +) RETURNS TABLE( + id text, + dictionary_id text, + created_at timestamp with time zone, + updated_at timestamp with time zone, + deleted timestamp with time zone, + main jsonb, + senses jsonb, + audios jsonb, + dialect_ids jsonb, + tag_ids jsonb +) AS $$ + WITH aggregated_audio AS ( + SELECT + audio.entry_id, + jsonb_agg( + jsonb_strip_nulls( + jsonb_build_object( + 'id', audio.id, + 'storage_path', audio.storage_path, + 'source', audio.source, + 'speaker_ids', audio_speakers.speaker_ids + ) + ) + ORDER BY audio.created_at) AS audios + FROM audio + LEFT JOIN ( + SELECT + audio_id, + jsonb_agg(speaker_id) AS speaker_ids + FROM audio_speakers + WHERE deleted IS NULL + GROUP BY audio_id + ) AS audio_speakers ON audio_speakers.audio_id = audio.id + WHERE audio.deleted IS NULL + GROUP BY audio.entry_id + ) + SELECT + entries.id AS id, + entries.dictionary_id AS dictionary_id, + entries.created_at, + entries.updated_at, + entries.deleted, + jsonb_strip_nulls( + jsonb_build_object( + 'lexeme', entries.lexeme, + 'phonetic', entries.phonetic, + 'interlinearization', entries.interlinearization, + 'morphology', entries.morphology, + 'notes', entries.notes, + 'sources', entries.sources, + 'scientific_names', entries.scientific_names, + 'coordinates', entries.coordinates, + 'unsupported_fields', entries.unsupported_fields, + 'elicitation_id', entries.elicitation_id + ) + ) AS main, + CASE + WHEN COUNT(senses.id) > 0 THEN jsonb_agg( + jsonb_strip_nulls( + jsonb_build_object( + 'id', senses.id, + 'glosses', senses.glosses, + 'parts_of_speech', senses.parts_of_speech, + 'semantic_domains', senses.semantic_domains, + 'write_in_semantic_domains', senses.write_in_semantic_domains, + 'noun_class', senses.noun_class, + 'definition', senses.definition, + 'plural_form', senses.plural_form, + 'variant', senses.variant, + 'sentence_ids', sentence_ids, + 'photo_ids', photo_ids, + 'video_ids', video_ids + ) + ) + ORDER BY senses.created_at + ) + ELSE NULL + END AS senses, + aggregated_audio.audios, + dialect_ids.dialect_ids, + tag_ids.tag_ids + FROM entries + LEFT JOIN senses ON senses.entry_id = entries.id AND senses.deleted IS NULL + LEFT JOIN aggregated_audio ON aggregated_audio.entry_id = entries.id + LEFT JOIN ( + SELECT + entry_id, + jsonb_agg(dialect_id) AS dialect_ids + FROM entry_dialects + WHERE deleted IS NULL + GROUP BY entry_id + ) AS dialect_ids ON dialect_ids.entry_id = entries.id + LEFT JOIN ( + SELECT + entry_id, + jsonb_agg(tag_id) AS tag_ids + FROM entry_tags + WHERE deleted IS NULL + GROUP BY entry_id + ) AS tag_ids ON tag_ids.entry_id = entries.id + LEFT JOIN ( + SELECT + senses_in_sentences.sense_id, + jsonb_agg(senses_in_sentences.sentence_id) AS sentence_ids + FROM senses_in_sentences + JOIN sentences ON sentences.id = senses_in_sentences.sentence_id + WHERE sentences.deleted IS NULL AND senses_in_sentences.deleted IS NULL + GROUP BY senses_in_sentences.sense_id + ) AS sense_sentences ON sense_sentences.sense_id = senses.id + LEFT JOIN ( + SELECT + sense_photos.sense_id, + jsonb_agg(sense_photos.photo_id) AS photo_ids + FROM sense_photos + JOIN photos ON photos.id = sense_photos.photo_id + WHERE photos.deleted IS NULL AND sense_photos.deleted IS NULL + GROUP BY sense_photos.sense_id + ) AS aggregated_photo_ids ON aggregated_photo_ids.sense_id = senses.id + LEFT JOIN ( + SELECT + sense_videos.sense_id, + jsonb_agg(sense_videos.video_id) AS video_ids + FROM sense_videos + JOIN videos ON videos.id = sense_videos.video_id + WHERE videos.deleted IS NULL AND sense_videos.deleted IS NULL + GROUP BY sense_videos.sense_id + ) AS aggregated_video_ids ON aggregated_video_ids.sense_id = senses.id + WHERE entries.updated_at > get_newer_than AND (dict_id = '' OR entries.dictionary_id = dict_id) + GROUP BY entries.id, aggregated_audio.audios, dialect_ids.dialect_ids, tag_ids.tag_ids + ORDER BY entries.updated_at ASC; +$$ LANGUAGE SQL SECURITY DEFINER; + +CREATE MATERIALIZED VIEW materialized_entries_view AS +SELECT * FROM entries_from_timestamp('1970-01-01 01:00:00+00', ''); + +CREATE UNIQUE INDEX idx_materialized_entries_view_id ON materialized_entries_view (id); +REFRESH MATERIALIZED VIEW CONCURRENTLY materialized_entries_view; + +CREATE INDEX idx_materialized_entries_view_updated_at_dictionary_id +ON materialized_entries_view (updated_at, dictionary_id); + +-- use entries_from_timestamp rpc function in app to get entries in a more efficient manner but still keeping the view that calls the function for easy dashboard inspection +DROP VIEW IF EXISTS entries_view; +CREATE VIEW entries_view AS +SELECT * FROM entries_from_timestamp('1970-01-01 01:00:00+00', ''); + +-- duplicate of above with a different where clause for use in the entry page +CREATE FUNCTION entry_by_id( + passed_entry_id text +) RETURNS TABLE( + id text, + dictionary_id text, + created_at timestamp with time zone, + updated_at timestamp with time zone, + deleted timestamp with time zone, + main jsonb, + senses jsonb, + audios jsonb, + dialect_ids jsonb, + tag_ids jsonb +) AS $$ + WITH aggregated_audio AS ( + SELECT + audio.entry_id, + jsonb_agg( + jsonb_strip_nulls( + jsonb_build_object( + 'id', audio.id, + 'storage_path', audio.storage_path, + 'source', audio.source, + 'speaker_ids', audio_speakers.speaker_ids + ) + ) + ORDER BY audio.created_at) AS audios + FROM audio + LEFT JOIN ( + SELECT + audio_id, + jsonb_agg(speaker_id) AS speaker_ids + FROM audio_speakers + WHERE deleted IS NULL + GROUP BY audio_id + ) AS audio_speakers ON audio_speakers.audio_id = audio.id + WHERE audio.deleted IS NULL + GROUP BY audio.entry_id + ) + SELECT + entries.id AS id, + entries.dictionary_id AS dictionary_id, + entries.created_at, + entries.updated_at, + entries.deleted, + jsonb_strip_nulls( + jsonb_build_object( + 'lexeme', entries.lexeme, + 'phonetic', entries.phonetic, + 'interlinearization', entries.interlinearization, + 'morphology', entries.morphology, + 'notes', entries.notes, + 'sources', entries.sources, + 'scientific_names', entries.scientific_names, + 'coordinates', entries.coordinates, + 'unsupported_fields', entries.unsupported_fields, + 'elicitation_id', entries.elicitation_id + ) + ) AS main, + CASE + WHEN COUNT(senses.id) > 0 THEN jsonb_agg( + jsonb_strip_nulls( + jsonb_build_object( + 'id', senses.id, + 'glosses', senses.glosses, + 'parts_of_speech', senses.parts_of_speech, + 'semantic_domains', senses.semantic_domains, + 'write_in_semantic_domains', senses.write_in_semantic_domains, + 'noun_class', senses.noun_class, + 'definition', senses.definition, + 'plural_form', senses.plural_form, + 'variant', senses.variant, + 'sentence_ids', sentence_ids, + 'photo_ids', photo_ids, + 'video_ids', video_ids + ) + ) + ORDER BY senses.created_at + ) + ELSE NULL + END AS senses, + aggregated_audio.audios, + dialect_ids.dialect_ids, + tag_ids.tag_ids + FROM entries + LEFT JOIN senses ON senses.entry_id = entries.id AND senses.deleted IS NULL + LEFT JOIN aggregated_audio ON aggregated_audio.entry_id = entries.id + LEFT JOIN ( + SELECT + entry_id, + jsonb_agg(dialect_id) AS dialect_ids + FROM entry_dialects + WHERE deleted IS NULL + GROUP BY entry_id + ) AS dialect_ids ON dialect_ids.entry_id = entries.id + LEFT JOIN ( + SELECT + entry_id, + jsonb_agg(tag_id) AS tag_ids + FROM entry_tags + WHERE deleted IS NULL + GROUP BY entry_id + ) AS tag_ids ON tag_ids.entry_id = entries.id + LEFT JOIN ( + SELECT + senses_in_sentences.sense_id, + jsonb_agg(senses_in_sentences.sentence_id) AS sentence_ids + FROM senses_in_sentences + JOIN sentences ON sentences.id = senses_in_sentences.sentence_id + WHERE sentences.deleted IS NULL AND senses_in_sentences.deleted IS NULL + GROUP BY senses_in_sentences.sense_id + ) AS sense_sentences ON sense_sentences.sense_id = senses.id + LEFT JOIN ( + SELECT + sense_photos.sense_id, + jsonb_agg(sense_photos.photo_id) AS photo_ids + FROM sense_photos + JOIN photos ON photos.id = sense_photos.photo_id + WHERE photos.deleted IS NULL AND sense_photos.deleted IS NULL + GROUP BY sense_photos.sense_id + ) AS aggregated_photo_ids ON aggregated_photo_ids.sense_id = senses.id + LEFT JOIN ( + SELECT + sense_videos.sense_id, + jsonb_agg(sense_videos.video_id) AS video_ids + FROM sense_videos + JOIN videos ON videos.id = sense_videos.video_id + WHERE videos.deleted IS NULL AND sense_videos.deleted IS NULL + GROUP BY sense_videos.sense_id + ) AS aggregated_video_ids ON aggregated_video_ids.sense_id = senses.id + WHERE entries.id = passed_entry_id + GROUP BY entries.id, aggregated_audio.audios, dialect_ids.dialect_ids, tag_ids.tag_ids + ORDER BY entries.updated_at ASC; +$$ LANGUAGE SQL SECURITY DEFINER; \ No newline at end of file diff --git a/supabase/seed.sql b/supabase/seed.sql index 0776521db..53f18a2a1 100644 --- a/supabase/seed.sql +++ b/supabase/seed.sql @@ -1,6 +1,6 @@ INSERT INTO auth.users ("aud", "email", "id", "instance_id", "role") VALUES -('authenticated', 'jacob@livingtongues.org', '12345678-abcd-efab-cdef-123456789012', '00000000-0000-0000-0000-000000000000', 'authenticated'), -('authenticated', 'diego@livingtongues.org', '12345678-abcd-efab-cdef-123456789013', '00000000-0000-0000-0000-000000000000', 'authenticated'); +('authenticated', 'jacob@livingtongues.org', 'de2d3715-6337-45a3-a81a-d82c3210b2a7', '00000000-0000-0000-0000-000000000000', 'authenticated'), +('authenticated', 'diego@livingtongues.org', 'be43b1dd-6c64-494d-b5da-10d70c384433', '00000000-0000-0000-0000-000000000000', 'authenticated'); \ No newline at end of file diff --git a/vitest.workspace.ts b/vitest.workspace.ts index 62c81eff8..ba06fe585 100644 --- a/vitest.workspace.ts +++ b/vitest.workspace.ts @@ -4,6 +4,5 @@ export default defineWorkspace([ 'packages/site/vitest.config.ts', 'packages/types/vitest.config.ts', 'packages/scripts/vitest.config.ts', - 'packages/functions/vitest.config.ts', 'packages/ids-import/vitest.config.ts', ])