From 3f4890afc085712e0c384a0c119991728c956a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20C=C3=B3rdova=20Nieto?= <43384963+Danble@users.noreply.github.com> Date: Wed, 10 Jul 2024 21:44:06 -0600 Subject: [PATCH] Imports can have additional senses (#404) * use the same column names for all other example sentences even with multiple senses * allow multiples sentences linked to the same sense * check for dev server when importing entries * type import row everywhere, add speaker right away when new speaker encountered --------- Co-authored-by: livingtongues <89873785+livingtongues@users.noreply.github.com> Co-authored-by: Jacob Bowdoin <7559478+jacob-8@users.noreply.github.com> --- .github/workflows/lint.yml | 4 +- .vscode/settings.json | 2 + FLEx.model.ts | 82 +- eslint.config.js | 4 +- packages/scripts/.gitignore | 1 + .../scripts/algolia/addDictionariesToIndex.ts | 50 +- packages/scripts/algolia/algolia.ts | 31 +- packages/scripts/algolia/updateIndex.ts | 48 +- .../scripts/{config.ts => config-firebase.ts} | 50 +- .../supabase-db.ts => config-supabase.ts} | 10 + packages/scripts/countAllEntries.ts | 28 +- .../import/convertJsonRowToEntryFormat.ts | 76 - ...nvert_row_to_objects_for_databases.test.ts | 1505 +++++++++++++++++ .../convert_row_to_objects_for_databases.ts | 197 +++ packages/scripts/import/data/.gitignore | 1 + .../example-v4-senses/example-v4-senses.csv | 7 + packages/scripts/import/import-media.ts | 73 +- .../import/import-to-firebase-supabase.ts | 207 +++ packages/scripts/import/import.ts | 28 +- .../convertJsonRowToEntryFormat.test.ts | 85 +- .../import/old/convertJsonRowToEntryFormat.ts | 153 ++ .../import/{ => old}/import-spreadsheet-v4.ts | 104 +- .../scripts/import/old/import-spreadsheet.ts | 2 +- packages/scripts/import/parse-csv.ts | 20 +- packages/scripts/import/post-request.ts | 47 + packages/scripts/import/row.type.ts | 23 + packages/scripts/migrate-to-supabase/auth.ts | 4 +- packages/scripts/package.json | 3 + packages/scripts/refactor/entry-refactor.ts | 3 +- .../refactor/move-firestore-document.ts | 70 +- .../refactor/upload-old-dictionaries.ts | 24 +- packages/scripts/tsconfig.json | 34 +- .../site/src/db-tests/update-sense.test.ts | 151 +- .../src/lib/glosses/glossing-languages.ts | 11 +- .../lib/mocks/seed/write-seed-and-reset-db.ts | 2 +- .../site/src/lib/supabase/change/sense.ts | 28 +- .../site/src/lib/supabase/generated.types.ts | 52 +- .../entry/[entryId]/EntryField.svelte | 20 +- .../entry/[entryId]/EntryMedia.svelte | 2 +- .../routes/api/db/content-update/+server.ts | 74 +- packages/types/gloss.interface.ts | 21 +- packages/types/index.ts | 1 + .../supabase/content-update.interface.ts | 60 + pnpm-lock.yaml | 65 +- supabase/ideas/manager_policy.sql | 5 + .../migrations/20240225012557_updates.sql | 3 +- .../migrations/20240322012208_import-id.sql | 2 + 47 files changed, 2832 insertions(+), 641 deletions(-) rename packages/scripts/{config.ts => config-firebase.ts} (52%) rename packages/scripts/{migrate-to-supabase/supabase-db.ts => config-supabase.ts} (53%) delete mode 100644 packages/scripts/import/convertJsonRowToEntryFormat.ts create mode 100644 packages/scripts/import/convert_row_to_objects_for_databases.test.ts create mode 100644 packages/scripts/import/convert_row_to_objects_for_databases.ts create mode 100644 packages/scripts/import/data/example-v4-senses/example-v4-senses.csv create mode 100644 packages/scripts/import/import-to-firebase-supabase.ts rename packages/scripts/import/{ => old}/convertJsonRowToEntryFormat.test.ts (83%) create mode 100644 packages/scripts/import/old/convertJsonRowToEntryFormat.ts rename packages/scripts/import/{ => old}/import-spreadsheet-v4.ts (51%) create mode 100644 packages/scripts/import/post-request.ts create mode 100644 packages/scripts/import/row.type.ts create mode 100644 packages/types/supabase/content-update.interface.ts create mode 100644 supabase/ideas/manager_policy.sql create mode 100644 supabase/migrations/20240322012208_import-id.sql diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e58a6b81d..e1d3e80fa 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -21,9 +21,11 @@ jobs: - run: pnpm install - name: Run ESLint on changed files - uses: tj-actions/eslint-changed-files@v24 + uses: tj-actions/eslint-changed-files@v25 with: config_path: eslint.config.js + # escape_paths: false - needed if using SvelteKit parenthesis in routes + level: error file_extensions: | **/*.ts **/*.js diff --git a/.vscode/settings.json b/.vscode/settings.json index d8550da05..cfcb791c5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -91,6 +91,8 @@ "a11y-no-noninteractive-element-interactions": "ignore", }, + "comments.visible": false, + "deno.enable": true, "deno.lint": true, "deno.enablePaths": [ diff --git a/FLEx.model.ts b/FLEx.model.ts index 2c4cdc3e4..f5cb2097f 100644 --- a/FLEx.model.ts +++ b/FLEx.model.ts @@ -1,10 +1,10 @@ interface Entry { - id: string; - lexemeForm: MultiString; // thanks for this inspiration - we started with just a lexeme string field, and then people asked for more orthographies and we made them second class citizens as optional alternate orthographies. This accomplishes the same purpose as the multi-string here but it's not as elegant and has pain points. For example, once someone decided they wanted to make an alternate orthography the main orthography, but they couldn't. So I don't like it our current model and will use a MultiString after our migration. - citationForm: MultiString; // Am I correct that citation form is a convention from a world where print is the only medium? We don't have this field. In my opinion, if you have a lexeme that is important enough to add and gloss, like "radii" and the citation form is "radius", then in a digital dictionary, these belong as two separate entries with a relationship from radii to radius. Not sure what the relationship would be called but something like "child-citation" indicating that the "radii" entry is really an offshoot of the base word "radius". But at the end of the day we do have a very simple print view, so print conventions are still in view but in our world they are second-class citizens. Web usage with easy bouncing between entries via links is first-class. However, we don't have a system for relationships yet. That will be a further down the road benefit of our migration. In that it will be easy to indicate relationships between entries. For now we do have a few additional fields users can use to add some basic info like a "plural_form" field, and a deprecated "variant" field. I don't really like these but we have them at the moment. - literalMeaning: MultiString; // What is this field for? We have nothing like it. Meaning is based on sense and you already have gloss and definition fields there. - senses: Sense[]; - note: MultiString; // our notes field is just a string - is this going to cause grief when importing flex data. Is this designed this way for people to write notes in whatever writing system they like and be able to have fonts applied appropriately? + id: string + lexemeForm: MultiString // thanks for this inspiration - we started with just a lexeme string field, and then people asked for more orthographies and we made them second class citizens as optional alternate orthographies. This accomplishes the same purpose as the multi-string here but it's not as elegant and has pain points. For example, once someone decided they wanted to make an alternate orthography the main orthography, but they couldn't. So I don't like it our current model and will use a MultiString after our migration. + citationForm: MultiString // Am I correct that citation form is a convention from a world where print is the only medium? We don't have this field. In my opinion, if you have a lexeme that is important enough to add and gloss, like "radii" and the citation form is "radius", then in a digital dictionary, these belong as two separate entries with a relationship from radii to radius. Not sure what the relationship would be called but something like "child-citation" indicating that the "radii" entry is really an offshoot of the base word "radius". But at the end of the day we do have a very simple print view, so print conventions are still in view but in our world they are second-class citizens. Web usage with easy bouncing between entries via links is first-class. However, we don't have a system for relationships yet. That will be a further down the road benefit of our migration. In that it will be easy to indicate relationships between entries. For now we do have a few additional fields users can use to add some basic info like a "plural_form" field, and a deprecated "variant" field. I don't really like these but we have them at the moment. + literalMeaning: MultiString // What is this field for? We have nothing like it. Meaning is based on sense and you already have gloss and definition fields there. + senses: Sense[] + note: MultiString // our notes field is just a string - we are going to move to using MultiString to allow for different analysis writing systems. Needed when importing flex data. // Additional fields we have // phonetic?: string; // morphology?: string; @@ -16,63 +16,63 @@ interface Entry { } interface Sense { - id: string; - gloss: MultiString; - definition: MultiString; // we have this field used in our first dictionary but we don't show the field when it is empty (ie - we don't encourage it's use and just use glosses but that could change) - partOfSpeech: string; // this is an array because some entries serve as multiple parts of speech, we have a specific set which are keys that are translated in the UI (eg. "n" -> "noun" in English / "sustantivo" in Spanish) - semanticDomain: string[]; // we have a specific set which are keys that are translated in the UI (it's a majorly simplified system modeled after SemDom with some adjustments) a universal set of domains is nice for cross-linguistic work but doesn't always fit the semantic categories of a language so future growth in our semantic domains field could go a lot of different directions depending on needs, like accepting different systems (ie -SEMDOM) or letting a dictionary itself set up custom domains. We also plan to introduce tags, which would be multi-purpose for many different applications and that may negate the need for a dictionary to create their own domains. + id: string + gloss: MultiString + definition: MultiString // we have this field used in our first dictionary but we don't show the field when it is empty (ie - we don't encourage it's use and just use glosses but that could change) + partOfSpeech: string // this is an array because some entries serve as multiple parts of speech, we have a specific set which are keys that are translated in the UI (eg. "n" -> "noun" in English / "sustantivo" in Spanish) + semanticDomain: string[] // we have a specific set which are keys that are translated in the UI (it's a majorly simplified system modeled after SemDom with some adjustments) a universal set of domains is nice for cross-linguistic work but doesn't always fit the semantic categories of a language so future growth in our semantic domains field could go a lot of different directions depending on needs, like accepting different systems (ie -SEMDOM) or letting a dictionary itself set up custom domains. We also plan to introduce tags, which would be multi-purpose for many different applications and that may negate the need for a dictionary to create their own domains. // write_in_semantic_domains?: string[] // used to support legacy dictionaries, and obviously not translated. We show these and let users delete these and swap them out for the new system, but we don't allow editing or adding. - exampleSentences: ExampleSentence[]; + exampleSentences: ExampleSentence[] // noun_class?: string; additional field we have } interface ExampleSentence { // upgrading these to be first class citizens called Sentence - id: string; - sentence: MultiString; - translation: MultiString; - reference: string; // further fields like this haven't been thought through yet but there's room to grow + id: string + sentence: MultiString + translation: MultiString + reference: string // further fields like this haven't been thought through yet but there's room to grow } interface MultiString { - values: Record; // Our current use of something that's like MultiString doesn't nest values underneath a "values" key but it works the same way. It's just Record as in `gloss: { "en": "dog", "es": "perro" }` - is there a good reason to nest under values beside leaving room for adding notes or something in the future? What is the reason for the "values" key? As I expand our use of this MultiString idea, I'd like to know more about your experience here. + values: Record // Our current use of something that's like MultiString doesn't nest values underneath a "values" key but it works the same way. It's just Record as in `gloss: { "en": "dog", "es": "perro" }` - is there a good reason to nest under values beside leaving room for adding notes or something in the future? What is the reason for the "values" key? As I expand our use of this MultiString idea, I'd like to know more about your experience here. } interface WritingSystem { - id: WritingSystemId; - name: string; - abbreviation: string; - font: string; + id: WritingSystemId + name: string + abbreviation: string + font: string } interface WritingSystems { - analysis: WritingSystem[]; // let's pretend I'm studying a Native American language. This could be English and Spanish for example... - vernacular: WritingSystem[]; // and this might be Latin script and a native script? + analysis: WritingSystem[] // let's pretend I'm studying a Native American language. This could be English and Spanish for example... + vernacular: WritingSystem[] // and this might be Latin script and a native script? } -type WritingSystemId = string; +type WritingSystemId = string export interface ILexboxApiHub { - GetWritingSystems(): Promise; - GetEntries(options: QueryOptions): Promise; - SearchEntries(query: string, options: QueryOptions): Promise; - GetEntry(id: string): Promise; - CreateEntry(entry: Entry): Promise; - UpdateEntry(id: string, update: JsonOperation[]): Promise; - DeleteEntry(id: string): Promise; - CreateSense(entryId: string, sense: Sense): Promise; - UpdateSense(entryId: string, senseId: string, update: JsonOperation[]): Promise; - DeleteSense(entryId: string, senseId: string): Promise; - CreateExampleSentence(entryId: string, senseId: string, exampleSentence: ExampleSentence): Promise; - UpdateExampleSentence(entryId: string, senseId: string, exampleSentenceId: string, update: JsonOperation[]): Promise; - DeleteExampleSentence(entryId: string, senseId: string, exampleSentenceId: string): Promise; + GetWritingSystems: () => Promise + GetEntries: (options: QueryOptions) => Promise + SearchEntries: (query: string, options: QueryOptions) => Promise + GetEntry: (id: string) => Promise + CreateEntry: (entry: Entry) => Promise + UpdateEntry: (id: string, update: JsonOperation[]) => Promise + DeleteEntry: (id: string) => Promise + CreateSense: (entryId: string, sense: Sense) => Promise + UpdateSense: (entryId: string, senseId: string, update: JsonOperation[]) => Promise + DeleteSense: (entryId: string, senseId: string) => Promise + CreateExampleSentence: (entryId: string, senseId: string, exampleSentence: ExampleSentence) => Promise + UpdateExampleSentence: (entryId: string, senseId: string, exampleSentenceId: string, update: JsonOperation[]) => Promise + DeleteExampleSentence: (entryId: string, senseId: string, exampleSentenceId: string) => Promise } interface QueryOptions { - order: string; - count: number; - offset: number; + order: string + count: number + offset: number } interface JsonOperation { - do_no_know_yet: string; + do_no_know_yet: string } diff --git a/eslint.config.js b/eslint.config.js index d6345b117..32e71b0e9 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -50,7 +50,6 @@ export default antfu( files: ['**/*.test.ts'], rules: { 'test/consistent-test-it': ['error', { fn: 'test' }], - 'test/no-commented-out-tests': 'error', 'test/no-disabled-tests': 'error', 'test/consistent-test-filename': 'error', 'test/expect-expect': 'error', @@ -71,6 +70,7 @@ export default antfu( 'test/prefer-to-have-length': 'error', 'test/valid-describe-callback': 'error', 'test/valid-expect': 'error', + 'test/no-commented-out-tests': 'warn', }, }, { @@ -87,6 +87,8 @@ export default antfu( 'no-console': 'off', 'ts/no-unused-vars': 'off', 'ts/no-var-requires': 'off', + 'node/prefer-global/process': 'off', + 'unused-imports/no-unused-vars': 'off', }, }, { diff --git a/packages/scripts/.gitignore b/packages/scripts/.gitignore index 91b509c79..7f2090e84 100644 --- a/packages/scripts/.gitignore +++ b/packages/scripts/.gitignore @@ -2,3 +2,4 @@ logs service-account* .env sheets-viewer-SA.json +.env.supabase \ No newline at end of file diff --git a/packages/scripts/algolia/addDictionariesToIndex.ts b/packages/scripts/algolia/addDictionariesToIndex.ts index a770c9ea0..29fc69466 100644 --- a/packages/scripts/algolia/addDictionariesToIndex.ts +++ b/packages/scripts/algolia/addDictionariesToIndex.ts @@ -1,41 +1,39 @@ -import { db } from '../config'; -import { updateIndex } from './algolia'; -import { ActualDatabaseEntry } from '@living-dictionaries/types'; +import type { ActualDatabaseEntry } from '@living-dictionaries/types' +import * as prepare from '@living-dictionaries/functions/src/algolia/prepareDataForIndex' +import { db } from '../config-firebase' +import { updateIndex } from './algolia' // import { prepareDataForIndex } from '@living-dictionaries/functions/src/algolia/prepareDataForIndex'; -import * as prepare from '@living-dictionaries/functions/src/algolia/prepareDataForIndex'; -// @ts-ignore +// @ts-expect-error const prepareDataForIndex = prepare.default - .prepareDataForIndex as typeof import('@living-dictionaries/functions/src/algolia/prepareDataForIndex').prepareDataForIndex; // b/c file is declared to be commonjs by its package.json + .prepareDataForIndex as typeof import('@living-dictionaries/functions/src/algolia/prepareDataForIndex').prepareDataForIndex // b/c file is declared to be commonjs by its package.json -const indexAllDictionaries = async () => { - const dictionariesSnapshot = await db.collection(`dictionaries`).get(); - const dictionaryIds = dictionariesSnapshot.docs.map((doc) => doc.id); - console.log(dictionaryIds); - process.stdout.write(dictionaryIds + '\n'); +async function indexAllDictionaries() { + const dictionariesSnapshot = await db.collection(`dictionaries`).get() + const dictionaryIds = dictionariesSnapshot.docs.map(doc => doc.id) + console.log(dictionaryIds) + process.stdout.write(`${dictionaryIds}\n`) for (const dictionaryId of dictionaryIds) - await indexDictionary(dictionaryId); - -}; + await indexDictionary(dictionaryId) +} async function indexDictionary(dictionaryId: string) { - const entriesSnapshot = await db.collection(`dictionaries/${dictionaryId}/words`).get(); - const entries = await prepareEntriesFromSnapshot(entriesSnapshot, dictionaryId); - await updateIndex(entries); + const entriesSnapshot = await db.collection(`dictionaries/${dictionaryId}/words`).get() + const entries = await prepareEntriesFromSnapshot(entriesSnapshot, dictionaryId) + await updateIndex(entries) } -// eslint-disable-next-line no-undef async function prepareEntriesFromSnapshot(entriesSnapshot: FirebaseFirestore.QuerySnapshot, dictionaryId: string) { const entryPromises = entriesSnapshot.docs.map(async (doc) => { - const dbEntry = doc.data() as ActualDatabaseEntry; - const algoliaEntry = await prepareDataForIndex(dbEntry, dictionaryId, db); - console.log({ dbEntry, algoliaEntry}); - return { ...algoliaEntry, objectID: doc.id }; - }); - - const entries = await Promise.all(entryPromises); - return entries; + const dbEntry = doc.data() as ActualDatabaseEntry + const algoliaEntry = await prepareDataForIndex(dbEntry, dictionaryId, db) + console.log({ dbEntry, algoliaEntry }) + return { ...algoliaEntry, objectID: doc.id } + }) + + const entries = await Promise.all(entryPromises) + return entries } // indexAllDictionaries(); diff --git a/packages/scripts/algolia/algolia.ts b/packages/scripts/algolia/algolia.ts index 67bc590a9..72255de4b 100644 --- a/packages/scripts/algolia/algolia.ts +++ b/packages/scripts/algolia/algolia.ts @@ -1,32 +1,31 @@ -import algoliasearch from 'algoliasearch'; -import { projectId } from '../config'; -import { adminKey } from './algolia-admin-key.json'; -import { AlgoliaEntry } from '@living-dictionaries/types'; +import algoliasearch from 'algoliasearch' +import type { AlgoliaEntry } from '@living-dictionaries/types' +import { projectId } from '../config-firebase' +import { adminKey } from './algolia-admin-key.json' -const ALGOLIA_APP_ID = 'XCVBAYSYXD'; +const ALGOLIA_APP_ID = 'XCVBAYSYXD' -export const client = algoliasearch(ALGOLIA_APP_ID, adminKey); +export const client = algoliasearch(ALGOLIA_APP_ID, adminKey) const index = client.initIndex( - projectId === 'talking-dictionaries-dev' ? 'entries_dev' : 'entries_prod' -); + projectId === 'talking-dictionaries-dev' ? 'entries_dev' : 'entries_prod', +) -const MAX_CHUNK_SIZE = 3000; +const MAX_CHUNK_SIZE = 3000 // https://www.algolia.com/doc/api-reference/api-methods/add-objects/#examples // if forced to iterate instead of save all at once, take note of the rate limiting at 5000 backlogged requests https://www.algolia.com/doc/faq/indexing/is-there-a-rate-limit/ export async function updateIndex(entries: AlgoliaEntry[]) { try { for (let startOfChunkIndex = 0; startOfChunkIndex < entries.length; startOfChunkIndex += MAX_CHUNK_SIZE) { - const endOfChunk = startOfChunkIndex + MAX_CHUNK_SIZE; - const chunk = entries.slice(startOfChunkIndex, endOfChunk); - console.log({ startOfChunkIndex, endOfChunk, CHUNK_SIZE: MAX_CHUNK_SIZE, chunkLength: chunk.length }); + const endOfChunk = startOfChunkIndex + MAX_CHUNK_SIZE + const chunk = entries.slice(startOfChunkIndex, endOfChunk) + console.log({ startOfChunkIndex, endOfChunk, CHUNK_SIZE: MAX_CHUNK_SIZE, chunkLength: chunk.length }) - const { objectIDs } = await index.saveObjects(chunk); - console.log(`Entries indexed: ${objectIDs.length}`); + const { objectIDs } = await index.saveObjects(chunk) + console.log(`Entries indexed: ${objectIDs.length}`) } } catch (err) { - console.log(err); + console.log(err) } - } diff --git a/packages/scripts/algolia/updateIndex.ts b/packages/scripts/algolia/updateIndex.ts index f67d2ef64..4e33b1cfa 100644 --- a/packages/scripts/algolia/updateIndex.ts +++ b/packages/scripts/algolia/updateIndex.ts @@ -1,45 +1,43 @@ -import { db } from '../config'; -import { updateIndex } from './algolia'; -import { ActualDatabaseEntry } from '@living-dictionaries/types'; +import type { ActualDatabaseEntry } from '@living-dictionaries/types' +import * as prepare from '@living-dictionaries/functions/src/algolia/prepareDataForIndex' +import { db } from '../config-firebase' +import { updateIndex } from './algolia' // import { prepareDataForIndex } from '@living-dictionaries/functions/src/algolia/prepareDataForIndex'; -import * as prepare from '@living-dictionaries/functions/src/algolia/prepareDataForIndex'; -// @ts-ignore +// @ts-expect-error const prepareDataForIndex = prepare.default - .prepareDataForIndex as typeof import('@living-dictionaries/functions/src/algolia/prepareDataForIndex').prepareDataForIndex; // b/c file is declared to be commonjs by its package.json + .prepareDataForIndex as typeof import('@living-dictionaries/functions/src/algolia/prepareDataForIndex').prepareDataForIndex // b/c file is declared to be commonjs by its package.json async function updateMostRecentEntries(count: number, { dry = true }) { - const entriesSnapshot = await db.collectionGroup('words').orderBy('ua', 'desc').limit(count).get(); - const entries = await prepareEntriesFromSnapshot(entriesSnapshot); + const entriesSnapshot = await db.collectionGroup('words').orderBy('ua', 'desc').limit(count).get() + const entries = await prepareEntriesFromSnapshot(entriesSnapshot) if (!dry) - await updateIndex(entries); + await updateIndex(entries) } - async function updateIndexByField(fieldToIndex: string, { dry = true }) { // The field must be indexed first in Firebase - const entriesSnapshot = await db.collectionGroup('words').where(fieldToIndex, '!=', null).get(); - const entries = await prepareEntriesFromSnapshot(entriesSnapshot); + const entriesSnapshot = await db.collectionGroup('words').where(fieldToIndex, '!=', null).get() + const entries = await prepareEntriesFromSnapshot(entriesSnapshot) if (!dry) - await updateIndex(entries); + await updateIndex(entries) } -// eslint-disable-next-line no-undef async function prepareEntriesFromSnapshot(entriesSnapshot: FirebaseFirestore.QuerySnapshot) { const entryPromises = entriesSnapshot.docs.map(async (doc) => { - const dbEntry = doc.data() as ActualDatabaseEntry; - const dictionaryId = doc.ref.parent.parent.id; // dictionary/words/entry-123 -> doc.ref: entry-123, doc.ref.parent: words, doc.ref.parent.parent: dictionary - const algoliaEntry = await prepareDataForIndex(dbEntry, dictionaryId, db); - const time = dbEntry.ua.toDate(); - console.log({ dbEntry, algoliaEntry, time }); - return { ...algoliaEntry, objectID: doc.id }; - }); - - const entries = await Promise.all(entryPromises); - return entries; + const dbEntry = doc.data() as ActualDatabaseEntry + const dictionaryId = doc.ref.parent.parent.id // dictionary/words/entry-123 -> doc.ref: entry-123, doc.ref.parent: words, doc.ref.parent.parent: dictionary + const algoliaEntry = await prepareDataForIndex(dbEntry, dictionaryId, db) + const time = dbEntry.ua.toDate() + console.log({ dbEntry, algoliaEntry, time }) + return { ...algoliaEntry, objectID: doc.id } + }) + + const entries = await Promise.all(entryPromises) + return entries } // updateIndexByField('nc', { dry: true }); -updateMostRecentEntries(300, { dry: false }); +updateMostRecentEntries(300, { dry: false }) diff --git a/packages/scripts/config.ts b/packages/scripts/config-firebase.ts similarity index 52% rename from packages/scripts/config.ts rename to packages/scripts/config-firebase.ts index 40cbb2b2e..b84985f6e 100644 --- a/packages/scripts/config.ts +++ b/packages/scripts/config-firebase.ts @@ -1,44 +1,44 @@ -import { program } from 'commander'; -import { initializeApp, cert } from 'firebase-admin/app'; -import { FieldValue, getFirestore } from 'firebase-admin/firestore'; -import { getStorage } from 'firebase-admin/storage'; -import { getAuth } from 'firebase-admin/auth'; +import fs from 'node:fs' +import { program } from 'commander' +import { cert, initializeApp } from 'firebase-admin/app' +import { FieldValue, getFirestore } from 'firebase-admin/firestore' +import { getStorage } from 'firebase-admin/storage' +import { getAuth } from 'firebase-admin/auth' // import serviceAccountDev from './service-account-dev.json'; // import serviceAccountProd from './service-account-prod.json'; -import { serviceAccountDev, serviceAccountProd } from './service-accounts'; +import { serviceAccountDev, serviceAccountProd } from './service-accounts' + +/// LOGGER/// program // .version('0.0.1') .option('-e, --environment [dev/prod]', 'Firebase Project', 'dev') .allowUnknownOption() // because config is shared by multiple scripts - .parse(process.argv); + .parse(process.argv) -export const environment = program.opts().environment === 'prod' ? 'prod' : 'dev'; -export const projectId = - environment === 'prod' ? 'talking-dictionaries-alpha' : 'talking-dictionaries-dev'; +export const environment = program.opts().environment === 'prod' ? 'prod' : 'dev' +export const projectId + = environment === 'prod' ? 'talking-dictionaries-alpha' : 'talking-dictionaries-dev' -const serviceAccount = environment === 'dev' ? serviceAccountDev : serviceAccountProd; +const serviceAccount = environment === 'dev' ? serviceAccountDev : serviceAccountProd initializeApp({ // @ts-expect-error credential: cert(serviceAccount), databaseURL: `https://${projectId}.firebaseio.com`, storageBucket: `${projectId}.appspot.com`, -}); -export const db = getFirestore(); +}) +export const db = getFirestore() // const settings = { timestampsInSnapshots: true }; // db.settings(settings); -export const timestamp = FieldValue.serverTimestamp(); -export const storage = getStorage(); -export const auth = getAuth(); - -///LOGGER/// -import fs from 'fs'; -const logFile = fs.createWriteStream(`./logs/${Date.now()}.txt`, { flags: 'w' }); // 'a' to append, 'w' to truncate the file every time the process starts. +export const timestamp = FieldValue.serverTimestamp() +export const storage = getStorage() +export const auth = getAuth() +const logFile = fs.createWriteStream(`./logs/${Date.now()}.txt`, { flags: 'w' }) // 'a' to append, 'w' to truncate the file every time the process starts. console.log = function (data: any) { - logFile.write(JSON.stringify(data) + '\n'); - process.stdout.write(JSON.stringify(data) + '\n'); -}; -///END-LOGGER/// + logFile.write(`${JSON.stringify(data)}\n`) + process.stdout.write(`${JSON.stringify(data)}\n`) +} +/// END-LOGGER/// -console.log(`Running on ${environment}`); +console.log(`Running on ${environment}`) diff --git a/packages/scripts/migrate-to-supabase/supabase-db.ts b/packages/scripts/config-supabase.ts similarity index 53% rename from packages/scripts/migrate-to-supabase/supabase-db.ts rename to packages/scripts/config-supabase.ts index 6b3f4802e..3e36fd64d 100644 --- a/packages/scripts/migrate-to-supabase/supabase-db.ts +++ b/packages/scripts/config-supabase.ts @@ -1,12 +1,22 @@ import PG from 'pg' +import { createClient } from '@supabase/supabase-js' +import type { Database } from '@living-dictionaries/site/src/lib/supabase/database.types' +import * as dotenv from 'dotenv' + +dotenv.config({ path: '.env.supabase' }) + +export const supabase = createClient(process.env.PUBLIC_SUPABASE_API_URL, process.env.SUPABASE_SERVICE_ROLE_KEY) export async function executeQuery(query: string) { const client = new PG.Client({ user: 'postgres', host: '127.0.0.1', + // host: 'db.actkqboqpzniojhgtqzw.supabase.co', database: 'postgres', password: 'postgres', + // password: '**', port: 54322, + // port: 5432, }) try { await client.connect() diff --git a/packages/scripts/countAllEntries.ts b/packages/scripts/countAllEntries.ts index c35ad4ccc..1da24edea 100644 --- a/packages/scripts/countAllEntries.ts +++ b/packages/scripts/countAllEntries.ts @@ -1,25 +1,25 @@ -import { db } from './config'; +import { db } from './config-firebase' export async function countAllEntries() { - let overallEntryCount = 0; + let overallEntryCount = 0 - const dictionarySnaps = await db.collection('dictionaries').get(); - const dictionaryIds = dictionarySnaps.docs.map(doc => doc.id); + const dictionarySnaps = await db.collection('dictionaries').get() + const dictionaryIds = dictionarySnaps.docs.map(doc => doc.id) for (const dictionaryId of dictionaryIds) { - if (dictionaryId.startsWith('tdv1-')) continue; + if (dictionaryId.startsWith('tdv1-')) continue - const countData = await db.collection(`dictionaries/${dictionaryId}/words`).count().get(); - const { count: entryCount } = countData.data(); - console.log({ dictionaryId, entryCount, overallEntryCount }); - overallEntryCount += entryCount; - console.log({ dictionaryId, entryCount, overallEntryCount }); - await db.doc(`dictionaries/${dictionaryId}`).update({ entryCount }); + const countData = await db.collection(`dictionaries/${dictionaryId}/words`).count().get() + const { count: entryCount } = countData.data() + console.log({ dictionaryId, entryCount, overallEntryCount }) + overallEntryCount += entryCount + console.log({ dictionaryId, entryCount, overallEntryCount }) + await db.doc(`dictionaries/${dictionaryId}`).update({ entryCount }) } - await db.doc('stats/data').update({ overallEntryCount }); + await db.doc('stats/data').update({ overallEntryCount }) - return true; + return true } -countAllEntries().then(() => console.log('done')).catch(console.error); +countAllEntries().then(() => console.log('done')).catch(console.error) diff --git a/packages/scripts/import/convertJsonRowToEntryFormat.ts b/packages/scripts/import/convertJsonRowToEntryFormat.ts deleted file mode 100644 index 71bf1b899..000000000 --- a/packages/scripts/import/convertJsonRowToEntryFormat.ts +++ /dev/null @@ -1,76 +0,0 @@ -import type { ActualDatabaseEntry } from '@living-dictionaries/types'; -import type { Timestamp } from 'firebase/firestore'; - -export function convertJsonRowToEntryFormat( - row: Record, - dateStamp?: number, - // eslint-disable-next-line no-undef - timestamp?: FirebaseFirestore.FieldValue -): ActualDatabaseEntry { - const entry: ActualDatabaseEntry = { lx: row.lexeme, gl: {}, xs: {} }; - - if (row.phonetic) entry.ph = row.phonetic; - if (row.morphology) entry.mr = row.morphology; - if (row.interlinearization) entry.in = row.interlinearization; - if (row.partOfSpeech) entry.ps = returnArrayFromCommaSeparatedItems(row.partOfSpeech); - if (row.dialects) entry.di = row.dialects.split(',').map(dialect => dialect.trim()); - if (row.variant) entry.va = row.variant; - if (row.nounClass) entry.nc = row.nounClass; - if (row.source) entry.sr = row.source.split('|'); - if (row.pluralForm) entry.pl = row.pluralForm; - if (row.scientificName) entry.scn = [row.scientificName]; - if (row.semanticDomain_custom) entry.sd = [row.semanticDomain_custom]; - if (row.ID) entry.ei = row.ID; - - if (row.localOrthography) entry.lo = row.localOrthography; - if (row.localOrthography2) entry.lo2 = row.localOrthography2; - if (row.localOrthography3) entry.lo3 = row.localOrthography3; - if (row.localOrthography4) entry.lo4 = row.localOrthography4; - if (row.localOrthography5) entry.lo5 = row.localOrthography5; - - if (row.notes) entry.nt = row.notes; - - for (const [key, value] of Object.entries(row)) { - if (!value) continue; - - // gloss fields are labeled using bcp47 language codes followed by '_gloss' (e.g. es_gloss, tpi_gloss) - if (key.includes('_gloss')) { - const language = key.split('_gloss')[0]; - entry.gl[language] = value; - } - - if (key.includes('vernacular_exampleSentence')) { - entry.xs.vn = value; - continue; // to keep next block from also adding - } - - // example sentence fields are codes followed by '_exampleSentence' - if (key.includes('_exampleSentence')) { - const language = key.split('_exampleSentence')[0]; - entry.xs[language] = value; - } - - const semanticDomain_FOLLOWED_BY_OPTIONAL_DIGIT = /^semanticDomain\d*$/; // semanticDomain, semanticDomain2, semanticDomain<#>, but not semanticDomain_custom - if (semanticDomain_FOLLOWED_BY_OPTIONAL_DIGIT.test(key)) { - if (!entry.sdn) entry.sdn = []; - - entry.sdn.push(value.toString()); - } - } - - if (Object.keys(entry.xs).length === 0) - delete entry.xs; - - - if (!dateStamp) return entry; - - entry.ii = `v4-${dateStamp}`; - entry.ca = timestamp as Timestamp; - entry.ua = timestamp as Timestamp; - - return entry; -} - -export function returnArrayFromCommaSeparatedItems(string: string): string[] { - return string?.split(',').map((item) => item.trim()) || []; -} diff --git a/packages/scripts/import/convert_row_to_objects_for_databases.test.ts b/packages/scripts/import/convert_row_to_objects_for_databases.test.ts new file mode 100644 index 000000000..b21f95054 --- /dev/null +++ b/packages/scripts/import/convert_row_to_objects_for_databases.test.ts @@ -0,0 +1,1505 @@ +// Add your tests here, borrowing from what you already have +import { readFileSync } from 'node:fs' +import path from 'node:path' +import type { Timestamp } from 'firebase-admin/firestore' +import { convert_row_to_objects_for_databases } from './convert_row_to_objects_for_databases.js' +import type { Row } from './convert_row_to_objects_for_databases.js' +import { parseCSVFrom } from './parse-csv.js' + +describe('convertJsonRowToEntryFormat without senses', () => { + const fakeTimeStamp = 10101010 as unknown as Timestamp + const fakeDateStamp = 1715819006966 + + test('glosses', () => { + const csv_rows_without_header: Row[] = [ + { + lexeme: 'dolphin', + es_gloss: 'delfín', + }, + ] + const entries = csv_rows_without_header.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp })) + + expect(entries).toMatchInlineSnapshot(` + [ + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "es": "delfín", + }, + "ii": "v4-1715819006966", + "lx": "dolphin", + "ua": 10101010, + }, + "supabase_senses": [], + "supabase_sentences": [], + }, + ] + `) + }) + + test('example sentences', () => { + const csv_rows_without_header: Row[] = [ + { + lexeme: 'dolphin', + es_exampleSentence: 'el delfín nada en el océano.', + }, + ] + const entries = csv_rows_without_header.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp })) + + expect(entries).toMatchInlineSnapshot(` + [ + { + "firebase_entry": { + "ca": 10101010, + "gl": {}, + "ii": "v4-1715819006966", + "lx": "dolphin", + "ua": 10101010, + "xs": { + "es": "el delfín nada en el océano.", + }, + }, + "supabase_senses": [], + "supabase_sentences": [], + }, + ] + `) + }) + + test('semantic domains', () => { + const csv_rows_without_header: Row[] = [ + { + lexeme: 'dolphins', + semanticDomain: '5.15', + semanticDomain2: '1', + semanticDomain_custom: 'the sea!', + }, + ] + const entries = csv_rows_without_header.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp })) + + expect(entries).toMatchInlineSnapshot(` + [ + { + "firebase_entry": { + "ca": 10101010, + "gl": {}, + "ii": "v4-1715819006966", + "lx": "dolphins", + "sd": [ + "the sea!", + ], + "sdn": [ + "5.15", + "1", + ], + "ua": 10101010, + }, + "supabase_senses": [], + "supabase_sentences": [], + }, + ] + `) + }) + + test('high-level conversion from csv', () => { + const dictionaryId = 'example-v4' + const file = readFileSync(path.join(__dirname, `./data/${dictionaryId}/${dictionaryId}.csv`), 'utf8') + const rows = parseCSVFrom(file) + const rowsWithoutHeader = removeHeaderRow(rows) + const entries = rowsWithoutHeader.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp })) + + expect(entries).toEqual( + [ + { + firebase_entry: { + ca: 10101010, + di: [ + 'Modern Parisian French', + ], + gl: { + en: 'car', + es: 'auto', + }, + ii: 'v4-1715819006966', + lx: 'voiture', + nt: 'small automobile', + ph: 'vwatyʁ', + ps: [ + 'n', + 'v', + ], + sd: [ + 'vehicle|cars', + ], + sdn: [ + '5.15', + '5', + ], + ua: 10101010, + xs: { + en: 'I drive my car', + es: 'Conduzco mi auto', + vn: 'Je conduis ma voiture', + }, + }, + supabase_senses: [], + supabase_sentences: [], + }, + { + firebase_entry: { + ca: 10101010, + di: [ + 'Modern Parisian French', + 'Quebec French', + ], + gl: { + en: 'tree', + es: 'árbol', + }, + ii: 'v4-1715819006966', + lx: 'arbre', + nt: 'generic term for all kinds of trees', + ph: 'aʁbʁ', + ps: [ + 'n', + 'adj', + ], + scn: [ + 'Acer rubrum', + ], + sdn: [ + '1.4', + '1.2', + ], + ua: 10101010, + xs: { + en: 'The tree gives us shade', + es: 'El árbol nos da sombra', + vn: 'L\'arbre nous donne de l\'ombre', + }, + }, + supabase_senses: [], + supabase_sentences: [], + }, + { + firebase_entry: { + ca: 10101010, + di: [ + 'Modern Parisian French', + ], + gl: { + en: 'tube', + es: 'tubo', + }, + ii: 'v4-1715819006966', + lx: 'tube', + nt: 'a cylindrical device for liquids', + ph: 'tyb', + pl: 'tubes', + ps: [ + 'n', + ], + sd: [ + 'plumbing', + ], + sdn: [ + '5.9', + ], + ua: 10101010, + xs: { + en: 'The water goes through the tubes', + es: 'El agua pasa a través de los tubos', + vn: 'L\'eau passe à travers les tubes', + }, + }, + supabase_senses: [], + supabase_sentences: [], + }, + { + firebase_entry: { + ca: 10101010, + di: [ + 'Quebec French', + ], + gl: { + en: 'car', + es: 'auto', + }, + ii: 'v4-1715819006966', + lx: 'voiture', + nt: 'small automobile', + ph: 'vwɑtYʁ', + ps: [ + 'n', + ], + sd: [ + 'vehicle', + ], + sdn: [ + '5.15', + ], + sr: [ + 'testing sources', + ], + ua: 10101010, + xs: { + en: 'I drive my car', + es: 'Conduzco mi auto', + vn: 'Je conduis ma voiture', + }, + }, + supabase_senses: [], + supabase_sentences: [], + }, + { + firebase_entry: { + ca: 10101010, + di: [ + 'Quebec French', + ], + gl: { + en: 'neutral', + es: 'neutro', + }, + ii: 'v4-1715819006966', + lx: 'neutre', + ph: 'nøʏ̯tʁ̥', + ps: [ + 'adj', + ], + ua: 10101010, + xs: { + en: 'My room is painted with a neutral color.', + es: 'Mi habitación está pintada con un color neutro.', + vn: 'Ma chambre est peinte d\'une couleur neutre.', + }, + }, + supabase_senses: [], + supabase_sentences: [], + }, + { + firebase_entry: { + ca: 10101010, + di: [ + 'Quebec French', + ], + gl: { + en: 'to celebrate', + es: 'celebrar', + }, + ii: 'v4-1715819006966', + lx: 'fêter', + nt: 'to have a party', + ph: 'fɛɪ̯te', + ps: [ + 'v', + ], + sr: [ + 'test source', + 'with multiples sources, test', + 'https://example.com', + ], + ua: 10101010, + xs: { + en: 'We will really party tonight', + es: 'Vamos a celebrar esta noche', + vn: 'On va vraiment fêter à soir', + }, + }, + supabase_senses: [], + supabase_sentences: [], + }, + { + firebase_entry: { + ca: 10101010, + di: [ + 'Central Luganda', + ], + gl: { + en: 'I will see you', + es: 'Voy a verte', + }, + ii: 'v4-1715819006966', + in: '1SG-Fut-2SG-see-Fin.V', + lx: 'njakulaba', + mr: 'n-ja-ku-lab-a', + ps: [ + 'vp', + ], + ua: 10101010, + }, + supabase_senses: [], + supabase_sentences: [], + }, + { + firebase_entry: { + ca: 10101010, + gl: { + en: 'bye', + es: 'adiós', + }, + ii: 'v4-1715819006966', + lx: 'vale', + ua: 10101010, + }, + supabase_senses: [], + supabase_sentences: [], + }, + ], + ) + }) + + test('does not duplicate vernacular', () => { + const csv_rows_without_header: Row[] = [ + { + vernacular_exampleSentence: 'Hello world', + }, + ] + const entries = csv_rows_without_header.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp })) + + expect(entries).toMatchInlineSnapshot(` + [ + { + "firebase_entry": { + "ca": 10101010, + "gl": {}, + "ii": "v4-1715819006966", + "lx": undefined, + "ua": 10101010, + "xs": { + "vn": "Hello world", + }, + }, + "supabase_senses": [], + "supabase_sentences": [], + }, + ] + `) + }) +}) + +describe('convertJsonRowToEntryFormat with senses', () => { + const fakeTimeStamp = 10101010 as unknown as Timestamp + const fakeDateStamp = 1715819006966 + + test('multiple senses (glosses))', () => { + const csv_rows_with_senses: Row[] = [ + { + 'lexeme': '𒄧𒂸', + 'es_gloss': 'delfín', + 'en_gloss': 'dolphin', + 's2.es_gloss': 'pez', + 's2.en_gloss': 'fish', + 's3.en_gloss': 'marine mammal', + 's4.en_gloss': 'mythological creature', + 's4.es_gloss': 'creatura mitológica', + 's4.fr_gloss': 'créature mythologique', + + }, + ] + const entries = csv_rows_with_senses.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp, test: true })) + + expect(entries).toMatchInlineSnapshot(` + [ + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "en": "dolphin", + "es": "delfín", + }, + "ii": "v4-1715819006966", + "lx": "𒄧𒂸", + "ua": 10101010, + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "en": "fish", + "es": "pez", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + { + "sense": { + "glosses": { + "new": { + "en": "marine mammal", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111102", + }, + { + "sense": { + "glosses": { + "new": { + "en": "mythological creature", + "es": "creatura mitológica", + "fr": "créature mythologique", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111103", + }, + ], + "supabase_sentences": [], + }, + ] + `) + }) + + test('senses with sentences', () => { + const csv_rows_with_sentences: Row[] = [ + { + 'lexeme': '𒄧𒂸', + 'en_gloss': 'dolphin', + 's2.en_gloss': 'fish', + 's2.default_vernacular_exampleSentence': '𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧', + 's2.en_exampleSentence': 'The fish is swimmmimg', + 's2.es_exampleSentence': 'El pez está nadando', + }, + ] + const entries = csv_rows_with_sentences.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp, test: true })) + + expect(entries).toMatchInlineSnapshot(` + [ + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "en": "dolphin", + }, + "ii": "v4-1715819006966", + "lx": "𒄧𒂸", + "ua": 10101010, + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "en": "fish", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + ], + "supabase_sentences": [ + { + "sense_id": "11111111-1111-1111-1111-111111111100", + "sentence": { + "text": { + "new": { + "default": "𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧", + }, + }, + "translation": { + "new": { + "en": "The fish is swimmmimg", + "es": "El pez está nadando", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111102", + }, + ], + }, + ] + `) + }) + + test('senses with the rest fields', () => { + const csv_rows_with_other_fields: Row[] = [ + { + 'lexeme': 'foo', + 'en_gloss': 'test', + 's2.en_gloss': 'example', + 's2.partOfSpeech': 'n', + 's2.semanticDomain': '1.1', + 's2.nounClass': 'S', + }, + ] + const entries = csv_rows_with_other_fields.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp, test: true })) + expect(entries).toMatchInlineSnapshot(` + [ + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "en": "test", + }, + "ii": "v4-1715819006966", + "lx": "foo", + "ua": 10101010, + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "en": "example", + }, + }, + "noun_class": { + "new": "S", + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + "semantic_domains": { + "new": [ + "1.1", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + ], + "supabase_sentences": [], + }, + ] + `) + }) + + test('wrong order in senses', () => { + const csv_rows_with_senses: Row[] = [ + { + 'lexeme': '𒂸', + 'es_gloss': 'sopa', + 'en_gloss': 'soup', + 's2.es_gloss': 'agua', + 's3.es_gloss': 'líquido', + 's3.en_gloss': 'liquid', + 's2.en_gloss': 'water', + }, + ] + const entries = csv_rows_with_senses.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp, test: true })) + + expect(entries).not.toEqual( + [ + { + firebase_entry: { + ca: 10101010, + gl: { + en: 'soup', + es: 'sopa', + }, + ii: 'v4-1715819006966', + lx: '𒂸', + ua: 10101010, + }, + supabase_senses: [ + { + sense: { + glosses: { + new: { + es: 'agua', + en: 'water', + }, + }, + }, + sense_id: '11111111-1111-1111-1111-111111111100', + }, + { + sense: { + glosses: { + new: { + en: 'liquid', + es: 'líquido', + }, + }, + }, + sense_id: '11111111-1111-1111-1111-111111111102', + }, + ], + supabase_sentences: [], + }, + ], + ) + }) + + test('senses with multiple sentences and last vernacular sentence without its translations', () => { + const csv_rows_with_sentences: Row[] = [ + { + 'lexeme': '𒄧𒂸', + 'en_gloss': 'dolphin', + 's2.en_gloss': 'fish', + 's2.default_vernacular_exampleSentence': '𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧', + 's2.en_exampleSentence': 'The fish is swimmming', + 's2.es_exampleSentence': 'El pez está nadando', + 's3.en_gloss': 'swim', + 's3.default_vernacular_exampleSentence': '𒂸𒂸𒄧', + 's3.en_exampleSentence': 'I swim', + 's4.en_gloss': 'test', + 's4.default_vernacular_exampleSentence': '𒂸𒂸 𒂸𒂸 𒂸𒂸', + }, + ] + const entries = csv_rows_with_sentences.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp, test: true })) + + expect(entries).toMatchInlineSnapshot(` + [ + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "en": "dolphin", + }, + "ii": "v4-1715819006966", + "lx": "𒄧𒂸", + "ua": 10101010, + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "en": "fish", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + { + "sense": { + "glosses": { + "new": { + "en": "swim", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111103", + }, + { + "sense": { + "glosses": { + "new": { + "en": "test", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111105", + }, + ], + "supabase_sentences": [ + { + "sense_id": "11111111-1111-1111-1111-111111111100", + "sentence": { + "text": { + "new": { + "default": "𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧", + }, + }, + "translation": { + "new": { + "en": "The fish is swimmming", + "es": "El pez está nadando", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111102", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111103", + "sentence": { + "text": { + "new": { + "default": "𒂸𒂸𒄧", + }, + }, + "translation": { + "new": { + "en": "I swim", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111104", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111105", + "sentence": { + "text": { + "new": { + "default": "𒂸𒂸 𒂸𒂸 𒂸𒂸", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111106", + }, + ], + }, + ] + `) + }) + test('multiple vernacular and translations sentences that belongs to a same sense', () => { + const csv_rows_with_sentences: Row[] = [ + { + 'lexeme': '𒄧𒂸', + 'en_gloss': 'dolphin', + 's2.en_gloss': 'fish', + 's2.default_vernacular_exampleSentence': '𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧', + 's2.en_exampleSentence': 'The fish is swimmming', + 's2.es_exampleSentence': 'El pez está nadando', + 's2.default_vernacular_exampleSentence.2': '𒂸 𒂸𒂸𒂸 𒄧𒄧𒄧 𒄧', + 's3.en_gloss': 'swim', + 's3.default_vernacular_exampleSentence': '𒂸𒂸𒄧', + 's3.en_exampleSentence': 'I swim', + 's3.default_vernacular_exampleSentence.2': '𒄧𒂸 𒂸𒄧', + 's3.en_exampleSentence.2': 'He swam', + 's3.es_exampleSentence.2': 'Él nadó', + 's3.it_exampleSentence.2': 'egli nuotava', + 's3.default_vernacular_exampleSentence.3': '𒂸 𒄧𒄧 𒂸', + 's3.es_exampleSentence.3': 'Él nadará', + 's3.en_exampleSentence.3': 'He will swim', + 's4.en_gloss': 'test', + 's4.default_vernacular_exampleSentence': '𒂸𒂸 𒂸𒂸 𒂸𒂸', + }, + ] + const entries = csv_rows_with_sentences.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp, test: true })) + + expect(entries).toMatchInlineSnapshot(` + [ + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "en": "dolphin", + }, + "ii": "v4-1715819006966", + "lx": "𒄧𒂸", + "ua": 10101010, + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "en": "fish", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + { + "sense": { + "glosses": { + "new": { + "en": "swim", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111104", + }, + { + "sense": { + "glosses": { + "new": { + "en": "test", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111108", + }, + ], + "supabase_sentences": [ + { + "sense_id": "11111111-1111-1111-1111-111111111100", + "sentence": { + "text": { + "new": { + "default": "𒄧𒂸 𒄧 𒄧𒂸 𒂸𒂸𒄧", + }, + }, + "translation": { + "new": { + "en": "The fish is swimmming", + "es": "El pez está nadando", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111102", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111100", + "sentence": { + "text": { + "new": { + "default": "𒂸 𒂸𒂸𒂸 𒄧𒄧𒄧 𒄧", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111103", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111104", + "sentence": { + "text": { + "new": { + "default": "𒂸𒂸𒄧", + }, + }, + "translation": { + "new": { + "en": "I swim", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111105", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111104", + "sentence": { + "text": { + "new": { + "default": "𒄧𒂸 𒂸𒄧", + }, + }, + "translation": { + "new": { + "en": "He swam", + "es": "Él nadó", + "it": "egli nuotava", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111106", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111104", + "sentence": { + "text": { + "new": { + "default": "𒂸 𒄧𒄧 𒂸", + }, + }, + "translation": { + "new": { + "en": "He will swim", + "es": "Él nadará", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111107", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111108", + "sentence": { + "text": { + "new": { + "default": "𒂸𒂸 𒂸𒂸 𒂸𒂸", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111109", + }, + ], + }, + ] + `) + }) + + test('high-level conversion from csv with senses', () => { + const dictionaryId = 'example-v4-senses' + const file = readFileSync(path.join(__dirname, `./data/${dictionaryId}/${dictionaryId}.csv`), 'utf8') + const rows = parseCSVFrom(file) + const entries = rows.map(row => convert_row_to_objects_for_databases({ row, dateStamp: fakeDateStamp, timestamp: fakeTimeStamp, test: true })) + + expect(entries).toMatchInlineSnapshot(` + [ + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "es": "sol", + }, + "ii": "v4-1715819006966", + "lx": "kꞌahkꞌal", + "nt": "16/jul./2019. Bachajon", + "ps": [ + "n", + ], + "ua": 10101010, + "va": "kꞌajkꞌal", + "xs": { + "es": "Ya salió el sol", + "vn": "Lokꞌix tal kꞌahkꞌal", + }, + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "es": "fiebre", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + { + "sense": { + "glosses": { + "new": { + "es": "día", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111103", + }, + { + "sense": { + "glosses": { + "new": { + "es": "calor", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111106", + }, + ], + "supabase_sentences": [ + { + "sense_id": "11111111-1111-1111-1111-111111111100", + "sentence": { + "text": { + "new": { + "default": "Ay ta kꞌahkꞌal te chꞌin alale", + }, + }, + "translation": { + "new": { + "es": "El niño tiene fiebre", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111102", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111103", + "sentence": { + "text": { + "new": { + "default": "Cheb kꞌahkꞌal ya x-aꞌtejotik", + }, + }, + "translation": { + "new": { + "es": "Trabajaremos dos días", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111105", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111106", + "sentence": { + "text": { + "new": { + "default": "Toyol kꞌahkꞌal ya kaꞌiy", + }, + }, + "translation": { + "new": { + "es": "Siento mucho calor", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111108", + }, + ], + }, + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "es": "sol", + }, + "ii": "v4-1715819006966", + "lx": "kꞌaal", + "nt": "26/dic./2020", + "ps": [ + "n", + ], + "ua": 10101010, + "va": "kꞌahkꞌal", + "xs": { + "es": "Que bueno, ya salió el sol", + "vn": "Jaꞌnix lek-a lokꞌix tel kꞌaal", + }, + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "es": "fiebre", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + { + "sense": { + "glosses": { + "new": { + "es": "día", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111103", + }, + { + "sense": { + "glosses": { + "new": { + "es": "calor", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111106", + }, + ], + "supabase_sentences": [ + { + "sense_id": "11111111-1111-1111-1111-111111111100", + "sentence": { + "text": { + "new": { + "default": "Ay bayal skꞌaal te chꞌin x-Ixchele", + }, + }, + "translation": { + "new": { + "es": "Mi hijita Ixchel tiene mucha fiebre", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111102", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111103", + "sentence": { + "text": { + "new": { + "default": ""Bajtꞌix kꞌaal mamtik, yorailix ichꞌ lewa"", + }, + }, + "translation": { + "new": { + "es": "Ya transcurrió el día mi estimado señor, es momento de tomar un descanso", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111105", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111106", + "sentence": { + "text": { + "new": { + "default": "Toyol kꞌaal ya jkaꞌiy", + }, + }, + "translation": { + "new": { + "es": "Siento mucho calor", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111108", + }, + ], + }, + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "es": "sol", + }, + "ii": "v4-1715819006966", + "lx": "kꞌajkꞌal", + "nt": "14/dic./2019", + "ps": [ + "n", + ], + "ua": 10101010, + "va": "kꞌahkꞌal", + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "es": "día", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + { + "sense": { + "glosses": { + "new": { + "es": "calor", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111102", + }, + { + "sense": { + "glosses": { + "new": { + "es": "fiebre", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111103", + }, + ], + "supabase_sentences": [], + }, + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "es": "fuego", + }, + "ii": "v4-1715819006966", + "lx": "kꞌajkꞌ", + "nt": "23/sep./2023", + "ps": [ + "n", + ], + "ua": 10101010, + "va": "kꞌahkꞌ", + "xs": { + "es": "Ya hice el fuego", + "vn": "Tilix kuꞌun-i kꞌajkꞌi", + }, + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "es": "bravo", + }, + }, + "parts_of_speech": { + "new": [ + "adj", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + { + "sense": { + "glosses": { + "new": { + "es": "fiebre", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111103", + }, + { + "sense": { + "glosses": { + "new": { + "es": "caliente", + }, + }, + "parts_of_speech": { + "new": [ + "adj", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111106", + }, + ], + "supabase_sentences": [ + { + "sense_id": "11111111-1111-1111-1111-111111111100", + "sentence": { + "text": { + "new": { + "default": "Lom kꞌajkꞌ te mamal jkaxlane", + }, + }, + "translation": { + "new": { + "es": "El mestizo es muy bravo", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111102", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111103", + "sentence": { + "text": { + "new": { + "default": "Tsakbil ta kꞌajkꞌ te alale", + }, + }, + "translation": { + "new": { + "es": "El bebé tiene mucha fiebre", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111105", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111106", + "sentence": { + "text": { + "new": { + "default": "El café está caliente, tómalo despacio", + }, + }, + "translation": { + "new": { + "es": "Kꞌajkꞌ te kajpele, kꞌume xa awuchꞌ", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111108", + }, + ], + }, + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "es": "libro", + }, + "ii": "v4-1715819006966", + "lx": "jun", + "nt": "26/sep./2023", + "ps": [ + "n", + ], + "ua": 10101010, + "xs": { + "es": "¿Qué haces? - Estoy leyendo un libro", + "vn": "¿Beluk apas? - Yakalon ta skꞌoponel jun", + }, + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "es": "cuaderno", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + { + "sense": { + "glosses": { + "new": { + "es": "documento", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111103", + }, + { + "sense": { + "glosses": { + "new": { + "es": "papel", + }, + }, + "parts_of_speech": { + "new": [ + "n", + ], + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111106", + }, + ], + "supabase_sentences": [ + { + "sense_id": "11111111-1111-1111-1111-111111111100", + "sentence": { + "text": { + "new": { + "default": "La jta ta kitsel te june", + }, + }, + "translation": { + "new": { + "es": "Alcancé a rayar mi cuaderno", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111102", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111103", + "sentence": { + "text": { + "new": { + "default": "Maꞌme xa awochꞌ te ajune", + }, + }, + "translation": { + "new": { + "es": "No vayas a arrugar tu documento", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111105", + }, + { + "sense_id": "11111111-1111-1111-1111-111111111106", + "sentence": { + "text": { + "new": { + "default": "Zoe rompió el papel", + }, + }, + "translation": { + "new": { + "es": "La schꞌiꞌ jun te Zoe", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111108", + }, + ], + }, + { + "firebase_entry": { + "ca": 10101010, + "gl": { + "es": "abierto", + }, + "ii": "v4-1715819006966", + "lx": "jeꞌel", + "nt": "08/abr./2019", + "ps": [ + "adj", + ], + "ua": 10101010, + "va": "makal", + "xs": { + "es": "La puerta de mi casa quedó abierta", + "vn": "Jeꞌel jilel stiꞌ jna", + }, + }, + "supabase_senses": [ + { + "sense": { + "glosses": { + "new": { + "es": "abrir", + }, + }, + }, + "sense_id": "11111111-1111-1111-1111-111111111100", + }, + ], + "supabase_sentences": [ + { + "sense_id": "11111111-1111-1111-1111-111111111100", + "sentence": { + "text": { + "new": { + "default": "Jeꞌa tel tebuk i tiꞌnai ay bayal kꞌaal", + }, + }, + "translation": { + "new": { + "es": ""Abre un poco la puerta, hace mucho calor"", + }, + }, + }, + "sentence_id": "11111111-1111-1111-1111-111111111102", + }, + ], + }, + ] + `) + }) +}) + +function removeHeaderRow(rows: any[]) { + return rows.splice(1) +} diff --git a/packages/scripts/import/convert_row_to_objects_for_databases.ts b/packages/scripts/import/convert_row_to_objects_for_databases.ts new file mode 100644 index 000000000..30613267a --- /dev/null +++ b/packages/scripts/import/convert_row_to_objects_for_databases.ts @@ -0,0 +1,197 @@ +import { randomUUID } from 'node:crypto' +import type { ActualDatabaseEntry, ContentUpdateRequestBody } from '@living-dictionaries/types' +import type { Timestamp } from 'firebase/firestore' +import type { Row } from './row.type' + +export function convert_row_to_objects_for_databases({ row, dateStamp, timestamp, test = false }: { + row: Row + dateStamp?: number + timestamp?: FirebaseFirestore.FieldValue + test?: boolean +}): { + firebase_entry: ActualDatabaseEntry + supabase_senses: { + sense_id: string + sense: ContentUpdateRequestBody['change']['sense'] + }[] + supabase_sentences: { + sentence_id: string + sense_id: string + sentence: ContentUpdateRequestBody['change']['sentence'] + }[] + } { + const sense_regex = /^s\d+\./ + const multiple_sentence_regex = /_exampleSentence\.\d+$/ + const has_multiple_sentence_regex_label = (key: string) => multiple_sentence_regex.test(key) + const firebase_entry: ActualDatabaseEntry = { lx: row.lexeme, gl: {}, xs: {} } + interface SupabaseSense { + sense_id: string + sense: ContentUpdateRequestBody['change']['sense'] + } + interface SupabaseSentence { + sentence_id: string + sense_id: string + sentence: ContentUpdateRequestBody['change']['sentence'] + } + const uuid_template = '11111111-1111-1111-1111-111111111111' + let current_uuid_index = 0 + function incremental_consistent_uuid() { + return test ? uuid_template.slice(0, -2) + (current_uuid_index++).toString().padStart(2, '0') : randomUUID() + } + const supabase_sense: SupabaseSense = { + sense_id: incremental_consistent_uuid(), + sense: {}, + } + const supabase_sentence: SupabaseSentence = { + sentence_id: incremental_consistent_uuid(), + sense_id: supabase_sense.sense_id, + sentence: {}, + } + const supabase_senses = [] + const supabase_sentences = [] + let old_key = 2 + let old_language_key + let new_language_key + + if (row.phonetic) firebase_entry.ph = row.phonetic + if (row.morphology) firebase_entry.mr = row.morphology + if (row.interlinearization) firebase_entry.in = row.interlinearization + if (row.partOfSpeech) firebase_entry.ps = returnArrayFromCommaSeparatedItems(row.partOfSpeech) + if (row.dialects) firebase_entry.di = row.dialects.split(',').map(dialect => dialect.trim()) + if (row.variant) firebase_entry.va = row.variant + if (row.nounClass) firebase_entry.nc = row.nounClass + if (row.source) firebase_entry.sr = row.source.split('|') + if (row.pluralForm) firebase_entry.pl = row.pluralForm + if (row.scientificName) firebase_entry.scn = [row.scientificName] + if (row.semanticDomain_custom) firebase_entry.sd = [row.semanticDomain_custom] + if (row.ID) firebase_entry.ei = row.ID + + if (row.localOrthography) firebase_entry.lo1 = row.localOrthography + if (row.localOrthography2) firebase_entry.lo2 = row.localOrthography2 + if (row.localOrthography3) firebase_entry.lo3 = row.localOrthography3 + if (row.localOrthography4) firebase_entry.lo4 = row.localOrthography4 + if (row.localOrthography5) firebase_entry.lo5 = row.localOrthography5 + + if (row.notes) firebase_entry.nt = row.notes + + for (const [k, v] of Object.entries(row)) { + const key: keyof Row = k as keyof Row + const value: string = v as string + if (!value) continue + + // gloss fields are labeled using bcp47 language codes followed by '_gloss' (e.g. es_gloss, tpi_gloss) + if (key.includes('_gloss') && !sense_regex.test(key)) { + const [language] = key.split('_gloss') + firebase_entry.gl[language] = value + } + + if (key.includes('vernacular_exampleSentence') && !sense_regex.test(key)) { + firebase_entry.xs.vn = value + continue // to keep next block from also adding + } + + // example sentence fields are codes followed by '_exampleSentence' + if (key.includes('_exampleSentence') && !sense_regex.test(key)) { + const [language] = key.split('_exampleSentence') + firebase_entry.xs[language] = value + } + + if (sense_regex.test(key)) { + if (key.includes('_gloss')) { + let language_key = key.replace(sense_regex, '') + language_key = language_key.replace('_gloss', '') + + if (key === `s${old_key}.${language_key}_gloss`) { + supabase_sense.sense = { glosses: { new: { ...supabase_sense.sense?.glosses?.new, [language_key]: row[key] } } } + } else { + old_key++ + supabase_sense.sense_id = incremental_consistent_uuid() + supabase_sense.sense = { glosses: { ...supabase_sense.sense.glosses, new: { [language_key]: row[key] } } } + } + } + if (key.includes('_vernacular_exampleSentence')) { + let writing_system = key.replace(sense_regex, '') + writing_system = writing_system.replace('_vernacular_exampleSentence', '') + if (has_multiple_sentence_regex_label(key)) writing_system = writing_system.slice(0, writing_system.lastIndexOf('.')) + + if (key === `s${old_key}.${writing_system}_vernacular_exampleSentence` || has_multiple_sentence_regex_label(key)) { + supabase_sentence.sense_id = supabase_sense.sense_id + supabase_sentence.sentence_id = incremental_consistent_uuid() + if (key === `s${old_key}.${writing_system}_vernacular_exampleSentence` && !has_multiple_sentence_regex_label(key)) { + supabase_sentence.sentence = { text: { new: { ...supabase_sentence?.sentence?.text?.new, [writing_system]: row[key] } } } + } else if (has_multiple_sentence_regex_label(key)) { + supabase_sentence.sentence = { text: { new: { [writing_system]: row[key] } } } + } + } + } + if (key.includes('_exampleSentence') && !key.includes('_vernacular')) { // when key is a translated example sentence + new_language_key = key.replace(sense_regex, '') + new_language_key = new_language_key.replace('_exampleSentence', '') + if (has_multiple_sentence_regex_label(key)) new_language_key = new_language_key.slice(0, new_language_key.lastIndexOf('.')) + if (old_language_key && old_language_key === new_language_key && !has_multiple_sentence_regex_label(key)) supabase_sentence.sentence_id = incremental_consistent_uuid() + if (!old_language_key) old_language_key = new_language_key + if (key === `s${old_key}.${new_language_key}_exampleSentence` || has_multiple_sentence_regex_label(key)) { + supabase_sentence.sentence = { ...supabase_sentence.sentence, translation: { new: { ...supabase_sentence?.sentence?.translation?.new, [new_language_key]: row[key] } } } + } + } + if (key.includes('_exampleSentence')) { // in this case this includes verncaular and traslated example sentences + const sentence_index: number = supabase_sentences.findIndex(sentence => sentence.sentence_id === supabase_sentence.sentence_id) + const sense_index: number = supabase_sentences.findIndex(sentence => sentence.sense_id === supabase_sentence.sense_id) + const sense_index_exists = sense_index !== -1 + const sentence_index_exists = sentence_index !== -1 + if (sense_index_exists && !has_multiple_sentence_regex_label(key)) { + supabase_sentences[sense_index] = { ...supabase_sentence } + } else if (sentence_index_exists) { + supabase_sentences[sentence_index] = { ...supabase_sentence } + } else { + supabase_sentences.push({ ...supabase_sentence }) + } + } + old_language_key = new_language_key + if (key.includes('.partOfSpeech')) + supabase_sense.sense = { ...supabase_sense.sense, parts_of_speech: { new: [row[key]] } } + + if (key.includes('.semanticDomain')) + supabase_sense.sense = { ...supabase_sense.sense, semantic_domains: { new: [row[key]] } } + + if (key.includes('.nounClass')) + supabase_sense.sense = { ...supabase_sense.sense, noun_class: { new: row[key] } } + } + + if (sense_regex.test(key)) { + const index: number = supabase_senses.findIndex(sense => sense.sense_id === supabase_sense.sense_id) + const sense_index_exists = index !== -1 + if (sense_index_exists) { + supabase_senses[index] = { ...supabase_sense } + } else { + supabase_senses.push({ ...supabase_sense }) + } + } + + const semanticDomain_FOLLOWED_BY_OPTIONAL_DIGIT = /^semanticDomain\d*$/ // semanticDomain, semanticDomain2, semanticDomain<#>, but not semanticDomain_custom + if (semanticDomain_FOLLOWED_BY_OPTIONAL_DIGIT.test(key)) { + if (!firebase_entry.sdn) firebase_entry.sdn = [] + + firebase_entry.sdn.push(value.toString()) + } + } + + if (Object.keys(firebase_entry.xs).length === 0) + delete firebase_entry.xs + + // if (!dateStamp) return firebase_entry + + firebase_entry.ii = `v4-${dateStamp}` + firebase_entry.ca = timestamp as Timestamp + firebase_entry.ua = timestamp as Timestamp + + return { + firebase_entry, + supabase_senses, + supabase_sentences, + } +} + +export function returnArrayFromCommaSeparatedItems(string: string): string[] { + return string?.split(',').map(item => item.trim()) || [] +} diff --git a/packages/scripts/import/data/.gitignore b/packages/scripts/import/data/.gitignore index 114c21ffd..60b98686c 100644 --- a/packages/scripts/import/data/.gitignore +++ b/packages/scripts/import/data/.gitignore @@ -3,3 +3,4 @@ /* !.gitignore !example-v4/ +!example-v4-senses/ diff --git a/packages/scripts/import/data/example-v4-senses/example-v4-senses.csv b/packages/scripts/import/data/example-v4-senses/example-v4-senses.csv new file mode 100644 index 000000000..7e0a52d99 --- /dev/null +++ b/packages/scripts/import/data/example-v4-senses/example-v4-senses.csv @@ -0,0 +1,7 @@ +lexeme,variant,es_gloss,partOfSpeech ,vernacular_exampleSentence ,es_exampleSentence ,s2.es_gloss,s2.partOfSpeech,s2.default_vernacular_exampleSentence,s2.es_exampleSentence,s3.es_gloss,s3.partOfSpeech,s3.default_vernacular_exampleSentence,s3.es_exampleSentence,s4.es_gloss,s4.partOfSpeech,s4.default_vernacular_exampleSentence,s4.es_exampleSentence,notes +kꞌahkꞌal,kꞌajkꞌal,sol,n,Lokꞌix tal kꞌahkꞌal,Ya salió el sol,fiebre,n,Ay ta kꞌahkꞌal te chꞌin alale,El niño tiene fiebre,día,n,Cheb kꞌahkꞌal ya x-aꞌtejotik,Trabajaremos dos días,calor,n,Toyol kꞌahkꞌal ya kaꞌiy,Siento mucho calor,16/jul./2019. Bachajon +kꞌaal,kꞌahkꞌal,sol,n,Jaꞌnix lek-a lokꞌix tel kꞌaal,"Que bueno, ya salió el sol",fiebre,n,Ay bayal skꞌaal te chꞌin x-Ixchele,Mi hijita Ixchel tiene mucha fiebre,día,n,"""Bajtꞌix kꞌaal mamtik, yorailix ichꞌ lewa""","Ya transcurrió el día mi estimado señor, es momento de tomar un descanso",calor,n,Toyol kꞌaal ya jkaꞌiy,Siento mucho calor,26/dic./2020 +kꞌajkꞌal,kꞌahkꞌal,sol,n,,,día,n,,,calor,n,,,fiebre,n,,,14/dic./2019 +kꞌajkꞌ,kꞌahkꞌ,fuego,n,Tilix kuꞌun-i kꞌajkꞌi,Ya hice el fuego,bravo,adj,Lom kꞌajkꞌ te mamal jkaxlane,El mestizo es muy bravo,fiebre,n,Tsakbil ta kꞌajkꞌ te alale,El bebé tiene mucha fiebre,caliente,adj,"El café está caliente, tómalo despacio","Kꞌajkꞌ te kajpele, kꞌume xa awuchꞌ",23/sep./2023 +jun,,libro,n,¿Beluk apas? - Yakalon ta skꞌoponel jun,¿Qué haces? - Estoy leyendo un libro,cuaderno,n,La jta ta kitsel te june,Alcancé a rayar mi cuaderno,documento,n,Maꞌme xa awochꞌ te ajune,No vayas a arrugar tu documento,papel,n,Zoe rompió el papel,La schꞌiꞌ jun te Zoe,26/sep./2023 +jeꞌel,makal,abierto,adj,Jeꞌel jilel stiꞌ jna,La puerta de mi casa quedó abierta,abrir,,Jeꞌa tel tebuk i tiꞌnai ay bayal kꞌaal,"""Abre un poco la puerta, hace mucho calor""",,,,,,,,,08/abr./2019 \ No newline at end of file diff --git a/packages/scripts/import/import-media.ts b/packages/scripts/import/import-media.ts index 8f2c64f04..034f93a4b 100644 --- a/packages/scripts/import/import-media.ts +++ b/packages/scripts/import/import-media.ts @@ -1,31 +1,32 @@ -import { join, dirname } from 'path'; -import { fileURLToPath } from 'url'; -const __dirname = dirname(fileURLToPath(import.meta.url)); +import { dirname, join } from 'node:path' +import { fileURLToPath } from 'node:url' -import * as fs from 'fs'; -import { environment, storage, timestamp } from '../config.js'; -import { getImageServingUrl } from './getImageServingUrl.js'; -import { GoalDatabasePhoto } from '@living-dictionaries/types/photo.interface.js'; +import * as fs from 'node:fs' +import type { GoalDatabasePhoto } from '@living-dictionaries/types/photo.interface.js' +import { environment, storage, timestamp } from '../config-firebase.js' +import { getImageServingUrl } from './getImageServingUrl.js' -const fileBucket = `talking-dictionaries-${environment == 'prod' ? 'alpha' : 'dev'}.appspot.com`; +const __dirname = dirname(fileURLToPath(import.meta.url)) + +const fileBucket = `talking-dictionaries-${environment === 'prod' ? 'alpha' : 'dev'}.appspot.com` export async function uploadAudioFile( audioFileName: string, entryId: string, dictionaryId: string, - dry = false + dry = false, ): Promise { - const audioDir = join(__dirname, `data/${dictionaryId}/audio`); - const audioFilePath = join(audioDir, audioFileName); + const audioDir = join(__dirname, `data/${dictionaryId}/audio`) + const audioFilePath = join(audioDir, audioFileName) if (!fs.existsSync(audioFilePath)) { - console.log(`>> Missing audio file: ${audioFileName}`); - return null; + console.log(`>> Missing audio file: ${audioFileName}`) + return null } try { - const fileTypeSuffix = audioFileName.match(/\.[0-9a-z]+$/i)[0]; - const uploadedAudioPath = `${dictionaryId}/audio/${entryId}_${new Date().getTime()}${fileTypeSuffix}`; + const [fileTypeSuffix] = audioFileName.match(/\.[0-9a-z]+$/i) + const uploadedAudioPath = `${dictionaryId}/audio/${entryId}_${new Date().getTime()}${fileTypeSuffix}` if (!dry) { await storage.bucket(fileBucket).upload(audioFilePath, { @@ -33,14 +34,11 @@ export async function uploadAudioFile( metadata: { originalFileName: audioFileName, }, - }); + }) } - return uploadedAudioPath; + return uploadedAudioPath } catch (err) { - console.log( - `!!! Not adding audio ${audioFileName} as the server had trouble uploading it. Double-check the file to see if there is a problem with it or perhaps there is code/server/network-connection problem. Error: ${err}` - ); - return null; + throw new Error(`Not adding audio ${audioFileName} as the server had trouble uploading it. Double-check the file to see if there is a problem with it or perhaps there is code/server/network-connection problem. Error: ${err}`) } } @@ -48,36 +46,34 @@ export async function uploadImageFile( imageFileName: string, entryId: string, dictionaryId: string, - dry = false + dry = false, ): Promise { - const imageDir = join(__dirname, `data/${dictionaryId}/images`); - const imageFilePath = join(imageDir, imageFileName); + const imageDir = join(__dirname, `data/${dictionaryId}/images`) + const imageFilePath = join(imageDir, imageFileName) if (!fs.existsSync(imageFilePath)) { - console.log(`>> Missing image file: ${imageFileName}`); - return null; + console.log(`>> Missing image file: ${imageFileName}`) + return null } try { - const fileTypeSuffix = imageFileName.match(/\.[0-9a-z]+$/i)[0]; - const storagePath = `${dictionaryId}/images/${entryId}_${new Date().getTime()}${fileTypeSuffix}`; + const [fileTypeSuffix] = imageFileName.match(/\.[0-9a-z]+$/i) + const storagePath = `${dictionaryId}/images/${entryId}_${new Date().getTime()}${fileTypeSuffix}` if (dry) - return { path: storagePath, gcs: 'no-path-bc-dry-run' }; - + return { path: storagePath, gcs: 'no-path-bc-dry-run' } await storage.bucket(fileBucket).upload(imageFilePath, { destination: storagePath, metadata: { originalFileName: imageFileName, }, - }); + }) - let gcsPath; + let gcsPath try { - gcsPath = await getImageServingUrl(storagePath, environment); + gcsPath = await getImageServingUrl(storagePath, environment) } catch (err) { - console.log(`!!! Error getting image serving URL: ${err}`); - gcsPath = ''; + throw new Error(`!!! Error getting image serving URL: ${err}`) } return { @@ -85,11 +81,8 @@ export async function uploadImageFile( gcs: gcsPath, ts: timestamp, // cr: // not yet included in import template - }; + } } catch (err) { - console.log( - `!!! Not adding image ${imageFileName} as the server had trouble digesting it. Double-check the file to see if it is just a corrupted jpg (as some are) or if the file is good and perhaps there is code/server/network-connection problem. Error: ${err}` - ); - return null; + throw new Error(`!!! Not adding image ${imageFileName} as the server had trouble digesting it. Double-check the file to see if it is just a corrupted jpg (as some are) or if the file is good and perhaps there is code/server/network-connection problem. Error: ${err}`) } } diff --git a/packages/scripts/import/import-to-firebase-supabase.ts b/packages/scripts/import/import-to-firebase-supabase.ts new file mode 100644 index 000000000..913819e7f --- /dev/null +++ b/packages/scripts/import/import-to-firebase-supabase.ts @@ -0,0 +1,207 @@ +import { readFileSync } from 'node:fs' +import { randomUUID } from 'node:crypto' +import type { ActualDatabaseEntry, ContentUpdateRequestBody, ISpeaker } from '@living-dictionaries/types' +import type { Timestamp } from 'firebase/firestore' +import { db, environment, timestamp } from '../config-firebase.js' +import { uploadAudioFile, uploadImageFile } from './import-media.js' +import { parseCSVFrom } from './parse-csv.js' +import { post_request } from './post-request.js' +import { convert_row_to_objects_for_databases } from './convert_row_to_objects_for_databases.js' +import type { Row } from './row.type' + +const supabase_content_update_endpoint = 'http://localhost:3041/api/db/content-update' +const developer_in_charge_supabase_uid = '12345678-abcd-efab-cdef-123456789013' // in Supabase diego@livingtongues.org -> Diego Córdova Nieto; +const developer_in_charge_firebase_uid = 'qkTzJXH24Xfc57cZJRityS6OTn52' // diego@livingtongues.org -> Diego Córdova Nieto; +type unique_speakers = Record +const different_speakers: unique_speakers = {} + +export async function importFromSpreadsheet(dictionaryId: string, dry = false) { + const dateStamp = Date.now() + + const file = readFileSync(`./import/data/${dictionaryId}/${dictionaryId}.csv`, 'utf8') + const rows = parseCSVFrom(file) + const entries = await importEntries(dictionaryId, rows, dateStamp, dry) + + console.log( + `Finished ${dry ? 'emulating' : 'importing'} ${entries.length} entries to ${environment === 'dev' ? 'http://localhost:3041/' : 'livingdictionaries.app/' + }${dictionaryId} in ${(Date.now() - dateStamp) / 1000} seconds`, + ) + console.log('') // line break + return entries +} + +export async function importEntries( + dictionary_id: string, + rows: Row[], + dateStamp: number, + dry = false, +): Promise { + const firebase_entries: ActualDatabaseEntry[] = [] + let entryCount = 0 + let batchCount = 0 + let batch = db.batch() + const colRef = db.collection(`dictionaries/${dictionary_id}/words`) + + const speaker_snapshots = (await db.collection('speakers').where('contributingTo', 'array-contains', dictionary_id).get()).docs + const speakers = speaker_snapshots.map((snap) => { + return { id: snap.id, ...(snap.data() as ISpeaker) } + }) + + for (const row of rows) { + if (!row.lexeme || row.lexeme === '(word/phrase)') + continue + + if (!dry && batchCount === 200) { + console.log('Committing batch of entries ending with: ', entryCount) + await batch.commit() + batch = db.batch() + batchCount = 0 + } + + const universal_entry_id = colRef.doc().id + + const { firebase_entry, supabase_senses, supabase_sentences } = convert_row_to_objects_for_databases({ row, dateStamp, timestamp }) + + for (const { sense, sense_id } of supabase_senses) { + await update_sense({ entry_id: universal_entry_id, dictionary_id, sense, sense_id, dry }) + } + for (const { sentence, sentence_id, sense_id } of supabase_sentences) { + await update_sentence({ entry_id: universal_entry_id, dictionary_id, sentence, sense_id, sentence_id, dry }) + } + + if (row.photoFile) { + const pf = await uploadImageFile(row.photoFile, universal_entry_id, dictionary_id, dry) + if (pf) firebase_entry.pf = pf + } + + if (row.soundFile) { + const audioFilePath = await uploadAudioFile(row.soundFile, universal_entry_id, dictionary_id, dry) + firebase_entry.sf = { + path: audioFilePath, + ts: Date.now(), + } + + if (row.speakerName) { + const speaker: ISpeaker = speakers.find(speaker => speaker.displayName === row.speakerName) + if (speaker) { + firebase_entry.sf.sp = speaker.id + } else { + const new_speaker: ISpeaker = { + displayName: row.speakerName, + birthplace: row.speakerHometown || '', + decade: Number.parseInt(row.speakerAge), + gender: row.speakerGender as 'm' | 'f' | 'o', + contributingTo: [dictionary_id], + createdAt: timestamp as Timestamp, + createdBy: developer_in_charge_firebase_uid, + updatedAt: timestamp as Timestamp, + updatedBy: developer_in_charge_firebase_uid, + } + const new_speaker_id = await db.collection('speakers').add(new_speaker).then(ref => ref.id) + firebase_entry.sf.sp = new_speaker_id + speakers.push({ id: new_speaker_id, ...new_speaker }) + } + } + } + + firebase_entries.push(firebase_entry) + batch.create(colRef.doc(universal_entry_id), firebase_entry) + batchCount++ + entryCount++ + } + + console.log(`Committing final batch of entries ending with: ${entryCount}`) + if (!dry) await batch.commit() + return firebase_entries +} + +export async function update_sense({ + entry_id, + dictionary_id, + sense, + sense_id, + dry, +}: { + entry_id: string + dictionary_id: string + sense: ContentUpdateRequestBody['change']['sense'] + sense_id: string + dry: boolean +}) { + if (dry) return console.log({ dry_sense: sense }) + + const error = await post_request(supabase_content_update_endpoint, { + id: randomUUID(), + auth_token: null, + user_id_from_local: developer_in_charge_supabase_uid, + dictionary_id, + entry_id, + timestamp: new Date().toISOString(), + sense_id, + table: 'senses', + change: { + sense, + }, + import_id: null, // TODO: add this - should match the one used in firebase entries + }) + + if (error) { + console.error('Error inserting into Supabase: ', error) + throw error + } + + return true +} + +export async function update_sentence({ + entry_id, + dictionary_id, + sentence, + sense_id, + sentence_id, + dry, +}: { + entry_id: string + dictionary_id: string + sentence: ContentUpdateRequestBody['change']['sentence'] + sense_id: string + sentence_id: string + dry: boolean +}) { + if (dry) return console.log({ dry_sense: sentence }) + + const error = await post_request(supabase_content_update_endpoint, { + id: randomUUID(), + auth_token: null, + user_id_from_local: developer_in_charge_supabase_uid, + dictionary_id, + entry_id, + timestamp: new Date().toISOString(), + sense_id, + sentence_id, + table: 'sentences', + change: { + sentence, + }, + import_id: null, // TODO: add this - should match the one used in firebase entries + }) + + if (error) { + console.error('Error inserting into Supabase: ', error) + throw error + } + + return true +} + +// Current flow: (out of date - needs updated) +// Use Firebase to import entry as is already written (import-spreadsheet-v4.ts) including 1st sense, but check the import data for additional senses. If so then do the below flow at that point using a simple function call. +// use that entry id to add additional senses to Supabase via entry_updates (seen in routes\api\db\change\entry\+server.ts and lib\supabase\change\sense.ts) - one update for ps, one for gloss +// add example sentence to new table (Jacob will create, so it doesn't exist yet) +// add another entry_update to connect that example sentence id to the sense + +// Future Supabase-only flow - ignore for now +// Import entry into imports table, after which a trigger edge function will create the entry, get the entry id +// use that entry id to add senses via entry_updates +// add example sentence to new table (doesn't exist yet) +// add entry_update to connect that example sentence to the sense diff --git a/packages/scripts/import/import.ts b/packages/scripts/import/import.ts index 14b5c5c32..3473a9860 100644 --- a/packages/scripts/import/import.ts +++ b/packages/scripts/import/import.ts @@ -1,18 +1,28 @@ -import { program } from 'commander'; +import { program } from 'commander' +// @ts-expect-error +import detect from 'detect-port' +import { importFromSpreadsheet } from './import-to-firebase-supabase' + +await checkForDevServer() + +async function checkForDevServer() { + const port = await detect(3041) // will return 3041 if available, next available if it's not (so if 3041 is taken, it will return 3042, etc.) + const devServerRunning = port > 3041 + if (devServerRunning) return + throw new Error('SvelteKit dev server not detected - run `pnpm dev` before running this import script to ensure the endpoint functions that save to Supabase are available.') +} program // .version('0.0.1') .option('-e, --environment [dev/prod]', 'Firebase Project', 'dev') .option('--id ', 'Dictionary Id') .option('--dry', 'Only log values, do not upload data and media') - .parse(process.argv); - -import { importFromSpreadsheet } from './import-spreadsheet-v4.js'; + .parse(process.argv) -const dictionaryId = program.opts().id; -const {dry} = program.opts(); +const dictionaryId = program.opts().id +const { dry } = program.opts() if (dry) - console.log('Dry run, no data will be uploaded'); + console.log('Dry run, no data will be uploaded') -console.log(`Importing ${dictionaryId} to ${program.opts().environment}.`); -importFromSpreadsheet(dictionaryId, dry).then((entries) => console.log(entries)); +console.log(`Importing ${dictionaryId} to ${program.opts().environment}.`) +importFromSpreadsheet(dictionaryId, dry).then(entries => console.log(entries)) diff --git a/packages/scripts/import/convertJsonRowToEntryFormat.test.ts b/packages/scripts/import/old/convertJsonRowToEntryFormat.test.ts similarity index 83% rename from packages/scripts/import/convertJsonRowToEntryFormat.test.ts rename to packages/scripts/import/old/convertJsonRowToEntryFormat.test.ts index d8ea07b7a..e1fea8b5f 100644 --- a/packages/scripts/import/convertJsonRowToEntryFormat.test.ts +++ b/packages/scripts/import/old/convertJsonRowToEntryFormat.test.ts @@ -1,13 +1,10 @@ -import { - convertJsonRowToEntryFormat, - returnArrayFromCommaSeparatedItems, -} from './convertJsonRowToEntryFormat.js'; -import { readFileSync } from 'fs'; -import { parseCSVFrom } from './parse-csv.js'; +import { readFileSync } from 'node:fs' import path from 'node:path' +import { convertJsonRowToEntryFormat, returnArrayFromCommaSeparatedItems } from './convertJsonRowToEntryFormat.js' +import { parseCSVFrom } from './../parse-csv.js' -describe('convertJsonRowToEntryFormat', () => { - const fakeTimeStamp = 10101010; +describe.skip('convertJsonRowToEntryFormat', () => { + const fakeTimeStamp = 10101010 test('glosses', () => { const csv_rows_without_header: Record[] = [ @@ -15,8 +12,8 @@ describe('convertJsonRowToEntryFormat', () => { lexeme: 'dolphin', es_gloss: 'delfín', }, - ]; - const entries = csv_rows_without_header.map((row) => convertJsonRowToEntryFormat(row)); + ] + const entries = csv_rows_without_header.map(row => convertJsonRowToEntryFormat(row)) expect(entries).toMatchInlineSnapshot(` [ @@ -27,8 +24,8 @@ describe('convertJsonRowToEntryFormat', () => { "lx": "dolphin", }, ] - `); - }); + `) + }) test('example sentences', () => { const csv_rows_without_header: Record[] = [ @@ -36,8 +33,8 @@ describe('convertJsonRowToEntryFormat', () => { lexeme: 'dolphin', es_exampleSentence: 'el delfín nada en el océano.', }, - ]; - const entries = csv_rows_without_header.map((row) => convertJsonRowToEntryFormat(row)); + ] + const entries = csv_rows_without_header.map(row => convertJsonRowToEntryFormat(row)) expect(entries).toMatchInlineSnapshot(` [ @@ -49,8 +46,8 @@ describe('convertJsonRowToEntryFormat', () => { }, }, ] - `); - }); + `) + }) test('semantic domains', () => { const csv_rows_without_header: Record[] = [ @@ -60,8 +57,8 @@ describe('convertJsonRowToEntryFormat', () => { semanticDomain2: '1', semanticDomain_custom: 'the sea!', }, - ]; - const entries = csv_rows_without_header.map((row) => convertJsonRowToEntryFormat(row)); + ] + const entries = csv_rows_without_header.map(row => convertJsonRowToEntryFormat(row)) expect(entries).toMatchInlineSnapshot(` [ @@ -77,22 +74,22 @@ describe('convertJsonRowToEntryFormat', () => { ], }, ] - `); - }); + `) + }) test('high-level conversion from csv', async () => { - const dictionaryId = 'example-v4'; - const file = readFileSync(path.join(__dirname, `./data/${dictionaryId}/${dictionaryId}.csv`), 'utf8'); - const rows = parseCSVFrom(file); - const rowsWithoutHeader = removeHeaderRow(rows); - const entries = rowsWithoutHeader.map((row) => + const dictionaryId = 'example-v4' + const file = readFileSync(path.join(__dirname, `./data/${dictionaryId}/${dictionaryId}.csv`), 'utf8') + const rows = parseCSVFrom(file) + const rowsWithoutHeader = removeHeaderRow(rows) + const entries = rowsWithoutHeader.map(row => convertJsonRowToEntryFormat( row, fakeTimeStamp, - // eslint-disable-next-line no-undef - fakeTimeStamp as unknown as FirebaseFirestore.FieldValue - ) - ); + + fakeTimeStamp as unknown as FirebaseFirestore.FieldValue, + ), + ) expect(entries).toEqual([ { @@ -243,16 +240,16 @@ describe('convertJsonRowToEntryFormat', () => { lx: 'vale', ua: 10101010, }, - ]); - }); + ]) + }) test('does not duplicate vernacular', () => { const csv_rows_without_header: Record[] = [ { vernacular_exampleSentence: 'Hello world', }, - ]; - const entries = csv_rows_without_header.map((row) => convertJsonRowToEntryFormat(row)); + ] + const entries = csv_rows_without_header.map(row => convertJsonRowToEntryFormat(row)) expect(entries).toMatchInlineSnapshot(` [ @@ -264,22 +261,22 @@ describe('convertJsonRowToEntryFormat', () => { }, }, ] - `); - }); -}); + `) + }) +}) function removeHeaderRow(rows: any[]) { - return rows.splice(1); + return rows.splice(1) } describe('returnArrayFromCommaSeparatedItems', () => { test('splits two comma separated items into an array', () => { - expect(returnArrayFromCommaSeparatedItems('n,v')).toStrictEqual(['n', 'v']); - }); + expect(returnArrayFromCommaSeparatedItems('n,v')).toStrictEqual(['n', 'v']) + }) test('handles unusual comma spacing', () => { - expect(returnArrayFromCommaSeparatedItems('n, v ,adj')).toStrictEqual(['n', 'v', 'adj']); - }); + expect(returnArrayFromCommaSeparatedItems('n, v ,adj')).toStrictEqual(['n', 'v', 'adj']) + }) test('returns empty array from undefined', () => { - expect(returnArrayFromCommaSeparatedItems(undefined)).toStrictEqual([]); - }); -}); + expect(returnArrayFromCommaSeparatedItems(undefined)).toStrictEqual([]) + }) +}) diff --git a/packages/scripts/import/old/convertJsonRowToEntryFormat.ts b/packages/scripts/import/old/convertJsonRowToEntryFormat.ts new file mode 100644 index 000000000..114eb06a9 --- /dev/null +++ b/packages/scripts/import/old/convertJsonRowToEntryFormat.ts @@ -0,0 +1,153 @@ +import { randomUUID } from 'node:crypto' +import type { ActualDatabaseEntry } from '@living-dictionaries/types' +import type { Timestamp } from 'firebase/firestore' + +interface StandardData { + row: Record + dateStamp?: number + timestamp?: FirebaseFirestore.FieldValue +} + +interface SenseData { + entry_id: string + dictionary_id: string +} + +export function convertJsonRowToEntryFormat( + standard: StandardData, + senseData?: SenseData, +): ActualDatabaseEntry { + const { row, dateStamp, timestamp } = standard + const entry: ActualDatabaseEntry = { lx: row.lexeme, gl: {}, xs: {} } + const sense_regex = /^s\d+_/ + let glossObject: Record = {} + const exampleSentenceObject: Record = {} + const exampleSentenceTranslationObject: Record = {} + let sense_id = randomUUID() + let sentence_id = randomUUID() + let old_key = 2 + + if (row.phonetic) entry.ph = row.phonetic + if (row.morphology) entry.mr = row.morphology + if (row.interlinearization) entry.in = row.interlinearization + if (row.partOfSpeech) entry.ps = returnArrayFromCommaSeparatedItems(row.partOfSpeech) + if (row.dialects) entry.di = row.dialects.split(',').map(dialect => dialect.trim()) + if (row.variant) entry.va = row.variant + if (row.nounClass) entry.nc = row.nounClass + if (row.source) entry.sr = row.source.split('|') + if (row.pluralForm) entry.pl = row.pluralForm + if (row.scientificName) entry.scn = [row.scientificName] + if (row.semanticDomain_custom) entry.sd = [row.semanticDomain_custom] + if (row.ID) entry.ei = row.ID + + if (row.localOrthography) entry.lo1 = row.localOrthography + if (row.localOrthography2) entry.lo2 = row.localOrthography2 + if (row.localOrthography3) entry.lo3 = row.localOrthography3 + if (row.localOrthography4) entry.lo4 = row.localOrthography4 + if (row.localOrthography5) entry.lo5 = row.localOrthography5 + + if (row.notes) entry.nt = row.notes + + for (const [key, value] of Object.entries(row)) { + if (!value) continue + + // gloss fields are labeled using bcp47 language codes followed by '_gloss' (e.g. es_gloss, tpi_gloss) + if (key.includes('_gloss') && !sense_regex.test(key)) { + const [language] = key.split('_gloss') + entry.gl[language] = value + } + + if (key.includes('vernacular_exampleSentence')) { + entry.xs.vn = value + continue // to keep next block from also adding + } + + // example sentence fields are codes followed by '_exampleSentence' + if (key.includes('_exampleSentence')) { + const [language] = key.split('_exampleSentence') + entry.xs[language] = value + } + + if (senseData) { + console.log(`key: ${key}`) + if (key === 'lexeme') + console.log(`lexeme: ${value}`) + const { entry_id, dictionary_id } = senseData + if (sense_regex.test(key)) { + if (key.includes('_gloss')) { + let language_key = key.replace(sense_regex, '') + language_key = language_key.replace('_gloss', '') + console.log(`language key: ${language_key}`) + + if (key === `s${old_key}_${language_key}_gloss`) { + glossObject[language_key] = row[key] + } else { + old_key++ + sense_id = randomUUID() + glossObject = {} + glossObject[language_key] = row[key] + } + console.log(`old key: ${old_key}`) + console.log(`sense id: ${sense_id}`) + update_sense(entry_id, dictionary_id, { glosses: { new: glossObject } }, sense_id) + console.log(`gloss object: ${JSON.stringify(glossObject)}`) + } + + console.log(`sentence id before vernacular example sentence: ${sentence_id}`) + if (key.includes('_vn_ES')) { + let writing_system = key.replace(sense_regex, '') + writing_system = writing_system.replace('_vn_ES', '') + + if (key === `s${old_key}_${writing_system}_vn_ES`) { + sentence_id = randomUUID() + exampleSentenceObject[writing_system] = row[key] + update_sentence(entry_id, dictionary_id, { text: { new: exampleSentenceObject } }, sense_id, sentence_id) + } + } + console.log(`sentence id before translation example sentence: ${sentence_id}`) + if (key.includes('_GES')) { + let language_key = key.replace(sense_regex, '') + language_key = language_key.replace('_GES', '') + + exampleSentenceTranslationObject[language_key] = row[key] + // if (key === `s${old_key}_${language_key}_GES`) { + // console.log('Is it getting here at all??') + // } + update_sentence(entry_id, dictionary_id, { translation: { new: exampleSentenceTranslationObject } }, sense_id, sentence_id) + } + + console.log(`sense id before pos: ${sense_id}`) + if (key.includes('_partOfSpeech')) + update_sense(entry_id, dictionary_id, { parts_of_speech: { new: [row[key]] } }, sense_id) + + if (key.includes('_semanticDomains')) + update_sense(entry_id, dictionary_id, { semantic_domains: { new: [row[key]] } }, sense_id) + + if (key.includes('_nounClass')) + update_sense(entry_id, dictionary_id, { noun_class: { new: [row[key]] } }, sense_id) + } + } + + const semanticDomain_FOLLOWED_BY_OPTIONAL_DIGIT = /^semanticDomain\d*$/ // semanticDomain, semanticDomain2, semanticDomain<#>, but not semanticDomain_custom + if (semanticDomain_FOLLOWED_BY_OPTIONAL_DIGIT.test(key)) { + if (!entry.sdn) entry.sdn = [] + + entry.sdn.push(value.toString()) + } + } + + if (Object.keys(entry.xs).length === 0) + delete entry.xs + + if (!dateStamp) return entry + + entry.ii = `v4-${dateStamp}` + entry.ca = timestamp as Timestamp + entry.ua = timestamp as Timestamp + + return entry +} + +export function returnArrayFromCommaSeparatedItems(string: string): string[] { + return string?.split(',').map(item => item.trim()) || [] +} diff --git a/packages/scripts/import/import-spreadsheet-v4.ts b/packages/scripts/import/old/import-spreadsheet-v4.ts similarity index 51% rename from packages/scripts/import/import-spreadsheet-v4.ts rename to packages/scripts/import/old/import-spreadsheet-v4.ts index 649f7a053..ff36b30c4 100644 --- a/packages/scripts/import/import-spreadsheet-v4.ts +++ b/packages/scripts/import/old/import-spreadsheet-v4.ts @@ -1,101 +1,101 @@ -import type { ActualDatabaseEntry } from '@living-dictionaries/types'; -import { db, timestamp, environment } from '../config.js'; -import { uploadAudioFile, uploadImageFile } from './import-media.js'; -import { readFileSync } from 'fs'; -import { parseCSVFrom } from './parse-csv.js'; -import { convertJsonRowToEntryFormat } from './convertJsonRowToEntryFormat.js'; +import { readFileSync } from 'node:fs' +import type { ActualDatabaseEntry } from '@living-dictionaries/types' +import { db, environment, timestamp } from '../../config-firebase.js' +import { uploadAudioFile, uploadImageFile } from './../import-media.js' +import { parseCSVFrom } from './../parse-csv.js' +import { convertJsonRowToEntryFormat } from './../convertJsonRowToEntryFormat.js' -const developer_in_charge = 'qkTzJXH24Xfc57cZJRityS6OTn52'; // diego@livingtongues.org -> Diego Córdova Nieto; -type unique_speakers = Record; -const different_speakers: unique_speakers[] = []; +const developer_in_charge = 'qkTzJXH24Xfc57cZJRityS6OTn52' // diego@livingtongues.org -> Diego Córdova Nieto; +type unique_speakers = Record +const different_speakers: unique_speakers[] = [] export async function importFromSpreadsheet(dictionaryId: string, dry = false) { - const dateStamp = Date.now(); + const dateStamp = Date.now() - const file = readFileSync(`./import/data/${dictionaryId}/${dictionaryId}.csv`, 'utf8'); - const rows = parseCSVFrom(file); - const entries = await importEntriesToFirebase(dictionaryId, rows, dateStamp, dry); + const file = readFileSync(`./import/data/${dictionaryId}/${dictionaryId}.csv`, 'utf8') + const rows = parseCSVFrom(file) + const entries = await importEntriesToFirebase(dictionaryId, rows, dateStamp, dry) console.log( `Finished ${dry ? 'emulating' : 'importing'} ${entries.length} entries to ${ environment === 'dev' ? 'http://localhost:3041/' : 'livingdictionaries.app/' - }${dictionaryId} in ${(Date.now() - dateStamp) / 1000} seconds` - ); - console.log(''); - return entries; + }${dictionaryId} in ${(Date.now() - dateStamp) / 1000} seconds`, + ) + console.log('') + return entries } export async function importEntriesToFirebase( dictionaryId: string, rows: any[], dateStamp: number, - dry = false + dry = false, ) { - const entries: ActualDatabaseEntry[] = []; - let entryCount = 0; - let batchCount = 0; - let batch = db.batch(); - const colRef = db.collection(`dictionaries/${dictionaryId}/words`); - const speakerRef = db.collection('speakers'); - const dictionarySpeakerSnapshot = await speakerRef.where('contributingTo', 'array-contains', dictionaryId).get(); - dictionarySpeakerSnapshot.docs.forEach((snap) => different_speakers.push({ [snap.data().displayName]: snap.id })); - let speakerId; + const entries: ActualDatabaseEntry[] = [] + let entryCount = 0 + let batchCount = 0 + let batch = db.batch() + const colRef = db.collection(`dictionaries/${dictionaryId}/words`) + const speakerRef = db.collection('speakers') + const dictionarySpeakerSnapshot = await speakerRef.where('contributingTo', 'array-contains', dictionaryId).get() + dictionarySpeakerSnapshot.docs.forEach(snap => different_speakers.push({ [snap.data().displayName]: snap.id })) + let speakerId for (const row of rows) { if (!row.lexeme || row.lexeme === '(word/phrase)') - continue; + continue if (!dry && batchCount === 200) { - console.log('Committing batch of entries ending with: ', entryCount); - await batch.commit(); - batch = db.batch(); - batchCount = 0; + console.log('Committing batch of entries ending with: ', entryCount) + await batch.commit() + batch = db.batch() + batchCount = 0 } - const entryId = colRef.doc().id; - const entry = convertJsonRowToEntryFormat(row, dateStamp, timestamp); + const entryId = colRef.doc().id + const entry = convertJsonRowToEntryFormat(row, dateStamp, timestamp) if (row.photoFile) { - const pf = await uploadImageFile(row.photoFile, entryId, dictionaryId, dry); - if (pf) entry.pf = pf; + const pf = await uploadImageFile(row.photoFile, entryId, dictionaryId, dry) + if (pf) entry.pf = pf } if (row.soundFile) { - speakerId = different_speakers.find(speaker => Object.keys(speaker).some(key => key === row.speakerName))?.[row.speakerName]; + speakerId = different_speakers.find(speaker => Object.keys(speaker).includes(row.speakerName))?.[row.speakerName] if (row.speakerName && !speakerId) { - speakerId = speakerRef.doc().id; - different_speakers.push({[row.speakerName]: speakerId}); + speakerId = speakerRef.doc().id + different_speakers.push({ [row.speakerName]: speakerId }) batch.create(speakerRef.doc(speakerId), { displayName: row.speakerName, birthplace: row.speakerHometown || '', - decade: parseInt(row.speakerAge) || '', + decade: Number.parseInt(row.speakerAge) || '', gender: row.speakerGender || '', contributingTo: [dictionaryId], createdAt: timestamp, createdBy: developer_in_charge, updatedAt: timestamp, updatedBy: developer_in_charge, - }); + }) } - const audioFilePath = await uploadAudioFile(row.soundFile, entryId, dictionaryId, dry); + const audioFilePath = await uploadAudioFile(row.soundFile, entryId, dictionaryId, dry) if (audioFilePath) { entry.sfs = [{ path: audioFilePath, ts: new Date().getTime(), - }]; + }] if (speakerId) - entry.sfs[0].sp = [speakerId]; + entry.sfs[0].sp = [speakerId] else - entry.sf.speakerName = row.speakerName; // Keep that if for some reason we need the speakername as text only again. + entry.sf.speakerName = row.speakerName // Keep that if for some reason we need the speakername as text only again. } } - entries.push(entry); - batch.create(colRef.doc(entryId), entry); - batchCount++; - entryCount++; + entries.push(entry) + batch.create(colRef.doc(entryId), entry) + batchCount++ + entryCount++ } - console.log(`Committing final batch of entries ending with: ${entryCount}`); - if (!dry) await batch.commit(); - return entries; + console.log(`Committing final batch of entries ending with: ${entryCount}`) + if (!dry) await batch.commit() + return entries } diff --git a/packages/scripts/import/old/import-spreadsheet.ts b/packages/scripts/import/old/import-spreadsheet.ts index 751fc0f8e..61e26ecfe 100644 --- a/packages/scripts/import/old/import-spreadsheet.ts +++ b/packages/scripts/import/old/import-spreadsheet.ts @@ -1,5 +1,5 @@ import * as fs from 'fs-extra'; -import { environment } from './config'; +import { environment } from './config-firebase'; import * as xlsx from 'xlsx'; import * as csv from 'csvtojson'; import { importSpreadsheetToFirebase } from './import-spreadsheet-to-firebase'; diff --git a/packages/scripts/import/parse-csv.ts b/packages/scripts/import/parse-csv.ts index 6682da0d8..3cc48ab00 100644 --- a/packages/scripts/import/parse-csv.ts +++ b/packages/scripts/import/parse-csv.ts @@ -1,13 +1,13 @@ -import { readFileSync } from 'fs'; -import { parse } from 'csv-parse/sync'; +import { readFileSync } from 'node:fs' import path from 'node:path' +import { parse } from 'csv-parse/sync' -export function parseCSVFrom(contents: string): Record[] { +export function parseCSVFrom>(contents: string): T[] { return parse(contents, { columns: true, skip_empty_lines: true, trim: true, - }); + }) } if (import.meta.vitest) { @@ -15,19 +15,19 @@ if (import.meta.vitest) { const csv = ` a ,the book ,c 1,2 , 3 -`; +` expect(parseCSVFrom(csv)).toEqual([ { 'a': '1', 'the book': '2', 'c': '3', }, - ]); - }); + ]) + }) test('parseCSV logs out example.csv as array', () => { const filepath = path.join(__dirname, './data/example-v4/example-v4.csv') - const file = readFileSync(filepath, 'utf8'); + const file = readFileSync(filepath, 'utf8') expect(parseCSVFrom(file)).toMatchInlineSnapshot(` [ { @@ -283,6 +283,6 @@ a ,the book ,c "vernacular_exampleSentence": "", }, ] - `); - }); + `) + }) } diff --git a/packages/scripts/import/post-request.ts b/packages/scripts/import/post-request.ts new file mode 100644 index 000000000..f1859011d --- /dev/null +++ b/packages/scripts/import/post-request.ts @@ -0,0 +1,47 @@ +import { ResponseCodes } from '@living-dictionaries/site/src/lib/constants' + +type Return = { + data: ExpectedResponse + error: null +} | { + data: null + error: { status: number, message: string } +} + +const default_headers: RequestInit['headers'] = { + 'content-type': 'application/json', +} + +export async function post_request, ExpectedResponse extends Record = any>(route: string, data: T, options?: { + fetch?: typeof fetch + headers?: RequestInit['headers'] +}): Promise> { + const fetch_to_use = options?.fetch || fetch + + const response = await fetch_to_use(route, { + method: 'POST', + body: JSON.stringify(data), + headers: options?.headers || default_headers, + }) + + return handleResponse(response) +} + +async function handleResponse>(response: Response): Promise> { + const { status } = response + if (status !== ResponseCodes.OK) { + const responseClone = response.clone() + try { + try { + const body = await response.json() + const error = { status, message: body.message || JSON.stringify(body) } + return { data: null, error } + } catch { + const textBody = await responseClone.text() + return { data: null, error: { status, message: textBody } } + } + } catch (err) { + return { data: null, error: { status, message: err.message } } + } + } +} diff --git a/packages/scripts/import/row.type.ts b/packages/scripts/import/row.type.ts new file mode 100644 index 000000000..94e0faf52 --- /dev/null +++ b/packages/scripts/import/row.type.ts @@ -0,0 +1,23 @@ +import type { Glossing_Languages } from '@living-dictionaries/site/src/lib/glosses/glossing-languages' + +export type Row = { + [key in (Entry_Fields | Sense_Fields | Sentence_Fields)]?: string; +} +// 's3.es_gloss': 'hi', +// 'semanticDomain4': '2.3', +// 's2.fr_exampleSentence.3': 'Bonjour docteur', +// 's4.default_vernacular_exampleSentence': 'foo bar', + +type Entry_Fields = `${String_Fields | Multiple_Fields | Translation_Fields}` + +type String_Fields = 'lexeme' | 'dialects' | 'ID' | 'soundFile' | 'speakerName' | 'scientificName' | 'speakerHometown' | 'speakerAge' | 'speakerGender' | 'notes' | 'source' | 'morphology' | 'interlinearization' | 'photoFile' | 'vernacular_exampleSentence' | 'pluralForm' | 'nounClass' | 'variant' | 'phonetic' | 'semanticDomain_custom' +type Fields_That_Can_Have_Multiple = 'localOrthography' | 'partOfSpeech' | 'semanticDomain' +type Multiple_Fields = `${Fields_That_Can_Have_Multiple}${Number_Suffix}` +type Number_Suffix = '' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' +type Translation_Fields = `${Glossing_Languages}_gloss` | `${Glossing_Languages}_exampleSentence` | `${Writing_Systems}_vernacular_exampleSentence` + +type Sense_Fields = `${Sense_Prefix}.${Entry_Fields}` // TODO: too broad +type Sense_Prefix = 's2' | 's3' | 's4' | 's5' | 's6' | 's7' | 's8' | 's9' + +type Sentence_Fields = `${Sense_Prefix}.${Entry_Fields}.${Number_Suffix}` // TODO: too broad +type Writing_Systems = 'default' // TODO improve Writing Systems field diff --git a/packages/scripts/migrate-to-supabase/auth.ts b/packages/scripts/migrate-to-supabase/auth.ts index bb2ec69ba..5ec6b2ef2 100644 --- a/packages/scripts/migrate-to-supabase/auth.ts +++ b/packages/scripts/migrate-to-supabase/auth.ts @@ -1,6 +1,6 @@ import type { UserRecord } from 'firebase-admin/auth' -import { auth } from '../config' -import { executeQuery } from './supabase-db' +import { auth } from '../config-firebase' +import { executeQuery } from '../config-supabase' import { write_users_insert } from './write-users-insert' migrate_users() diff --git a/packages/scripts/package.json b/packages/scripts/package.json index 2d5ff268e..db2f2cc68 100644 --- a/packages/scripts/package.json +++ b/packages/scripts/package.json @@ -21,6 +21,7 @@ "speakerRefactor": "tsx refactor/speaker-refactor.ts", "updateLocales": "tsx locales/update-locales.ts", "importDictionary": "tsx import/import.ts", + "importDictionary:dev": "tsx import/import.ts --id example-v4-senses", "addDictionariesToIndex:dev": "tsx algolia/addDictionariesToIndex.ts dev", "addDictionariesToIndex:prod": "tsx algolia/addDictionariesToIndex.js prod", "updateIndex": "tsx algolia/updateIndex.ts -e prod", @@ -32,12 +33,14 @@ "@living-dictionaries/functions": "workspace:^0.0.1", "@living-dictionaries/site": "workspace:^0.0.1", "@living-dictionaries/types": "^1.0.0", + "@supabase/supabase-js": "^2.38.4", "@types/node": "^18.11.18", "@types/pg": "^8.10.9", "algoliasearch": "^4.11.0", "commander": "^9.4.1", "csv-parse": "^5.3.0", "csvtojson": "^2.0.10", + "detect-port": "^1.6.1", "dotenv": "^16.0.2", "firebase": "^10.9.0", "firebase-admin": "^12.0.0", diff --git a/packages/scripts/refactor/entry-refactor.ts b/packages/scripts/refactor/entry-refactor.ts index 5f3786912..a96ca9826 100644 --- a/packages/scripts/refactor/entry-refactor.ts +++ b/packages/scripts/refactor/entry-refactor.ts @@ -1,7 +1,6 @@ -/* eslint-disable unused-imports/no-unused-vars */ import type { ActualDatabaseEntry } from '@living-dictionaries/types' import { program } from 'commander' -import { db } from '../config' +import { db } from '../config-firebase' import { reverse_semantic_domains_mapping } from './reverse-semantic-domains-mapping' import { turn_dialect_strings_to_arrays } from './turn-dialects-to-arrays' diff --git a/packages/scripts/refactor/move-firestore-document.ts b/packages/scripts/refactor/move-firestore-document.ts index 8e34a2899..198a922f6 100644 --- a/packages/scripts/refactor/move-firestore-document.ts +++ b/packages/scripts/refactor/move-firestore-document.ts @@ -1,4 +1,4 @@ -import { db } from '../config'; +import { db } from '../config-firebase' // deleteDocRecursively(`dictionaries/sipu`); // copyDoc(`dictionaries/sipu`, `dictionaries/conestoga_language`, {}, true); @@ -12,93 +12,93 @@ import { db } from '../config'; export async function moveDoc( oldDocPath: string, newDocPath: string, - addData?: any + addData?: any, ): Promise { - const copied = await copyDoc(oldDocPath, newDocPath, addData, true); + const copied = await copyDoc(oldDocPath, newDocPath, addData, true) if (copied) { - await deleteDocRecursively(`${oldDocPath}`); - return true; + await deleteDocRecursively(`${oldDocPath}`) + return true } - throw new Error('Data was not copied properly to the target collection, please try again.'); + throw new Error('Data was not copied properly to the target collection, please try again.') } export async function copyDoc( oldDocPath: string, newDocPath: string, addData: any = {}, - recursive = false + recursive = false, ): Promise { - const docRef = db.doc(oldDocPath); + const docRef = db.doc(oldDocPath) const docData = await docRef .get() - .then((doc) => doc.exists && doc.data()) + .then(doc => doc.exists && doc.data()) .catch((error) => { - throw new Error(`Error reading document ${oldDocPath}: ${JSON.stringify(error)}`); - }); + throw new Error(`Error reading document ${oldDocPath}: ${JSON.stringify(error)}`) + }) if (docData) { await db .doc(newDocPath) .set({ ...docData, ...addData }) .catch((error) => { - throw new Error(`Error creating document ${newDocPath}: ${JSON.stringify(error)}`); - }); + throw new Error(`Error creating document ${newDocPath}: ${JSON.stringify(error)}`) + }) // if copying of the subcollections is needed if (recursive) { // subcollections - const subcollections = await docRef.listCollections(); + const subcollections = await docRef.listCollections() for await (const subcollectionRef of subcollections) { - const subcollectionPath = `${oldDocPath}/${subcollectionRef.id}`; + const subcollectionPath = `${oldDocPath}/${subcollectionRef.id}` await subcollectionRef .get() .then(async (snapshot) => { - const {docs} = snapshot; + const { docs } = snapshot for await (const doc of docs) { await copyDoc( `${subcollectionPath}/${doc.id}`, `${newDocPath}/${subcollectionRef.id}/${doc.id}`, - true - ); + true, + ) } }) .catch((error) => { throw new Error( - `Error reading subcollection ${subcollectionPath}: ${JSON.stringify(error)}` - ); - }); + `Error reading subcollection ${subcollectionPath}: ${JSON.stringify(error)}`, + ) + }) } } - return true; + return true } - return false; + return false } export async function deleteDocRecursively(docPath: string): Promise { - const docRef = db.doc(docPath); + const docRef = db.doc(docPath) - const subcollections = await docRef.listCollections(); + const subcollections = await docRef.listCollections() for await (const subcollectionRef of subcollections) { await subcollectionRef .get() .then(async (snapshot) => { - const {docs} = snapshot; + const { docs } = snapshot for await (const doc of docs) - await deleteDocRecursively(`${docPath}/${subcollectionRef.id}/${doc.id}`); + await deleteDocRecursively(`${docPath}/${subcollectionRef.id}/${doc.id}`) - return true; + return true }) .catch((error) => { console.error( 'Error reading subcollection', `${docPath}/${subcollectionRef.id}`, - JSON.stringify(error) - ); - return false; - }); + JSON.stringify(error), + ) + return false + }) } // when all subcollections are deleted, delete the document itself @@ -106,7 +106,7 @@ export async function deleteDocRecursively(docPath: string): Promise { .delete() .then(() => true) .catch((error) => { - console.error('Error deleting document', docPath, JSON.stringify(error)); - return false; - }); + console.error('Error deleting document', docPath, JSON.stringify(error)) + return false + }) } diff --git a/packages/scripts/refactor/upload-old-dictionaries.ts b/packages/scripts/refactor/upload-old-dictionaries.ts index bd6ab43a7..2dc2dfc81 100644 --- a/packages/scripts/refactor/upload-old-dictionaries.ts +++ b/packages/scripts/refactor/upload-old-dictionaries.ts @@ -1,33 +1,31 @@ -import { GeoPoint } from 'firebase-admin/firestore'; -import { IDictionary } from '@living-dictionaries/types'; -import { db } from '../config'; +import { GeoPoint } from 'firebase-admin/firestore' +import type { IDictionary } from '@living-dictionaries/types' +import { db } from '../config-firebase' import { tdLocations } from './tdv1-dictionaries'; (() => { try { tdLocations.forEach(async (dictionary) => { if (dictionary.properties.icon === 'library-15') { - const dictionaryUrl = dictionary.properties.xlink.match( - /http:\/\/talkingdictionary.org\/(.+)/ - )[1]; + const [,dictionaryUrl] = dictionary.properties.xlink.match(/http:\/\/talkingdictionary.org\/(.+)/) const data: Partial = { name: dictionary.properties.label, population: dictionary.properties.size, publishYear: dictionary.properties.date, coordinates: new GeoPoint( dictionary.geometry.coordinates[1], - dictionary.geometry.coordinates[0] + dictionary.geometry.coordinates[0], ), url: dictionary.properties.xlink, type: 'tdv1', - }; + } if (dictionary.properties.thumbnail) - data.thumbnail = dictionary.properties.thumbnail; + data.thumbnail = dictionary.properties.thumbnail - await db.doc(`dictionaries/tdv1-${dictionaryUrl}`).set(data); + await db.doc(`dictionaries/tdv1-${dictionaryUrl}`).set(data) } - }); + }) } catch (err) { - console.log(err); + console.log(err) } -})(); +})() diff --git a/packages/scripts/tsconfig.json b/packages/scripts/tsconfig.json index 3ffc22c75..f8bd28d09 100644 --- a/packages/scripts/tsconfig.json +++ b/packages/scripts/tsconfig.json @@ -4,30 +4,30 @@ // Remove if you want ts-node to do typechecking. "transpileOnly": true, "files": true, - "esm": true, + "esm": true // "compilerOptions": { - // compilerOptions specified here will override those declared below, - // but *only* in ts-node. Useful if you want ts-node and tsc to use - // different options with a single tsconfig.json. + // compilerOptions specified here will override those declared below, + // but *only* in ts-node. Useful if you want ts-node and tsc to use + // different options with a single tsconfig.json. // } }, "compilerOptions": { + "target": "esnext", + "lib": [ + "esnext", + "DOM" + ], + "module": "esnext", "moduleResolution": "node", - "module": "es2020", - "lib": [ - "es2020", - "DOM" - ], - "target": "es2020", - "strict": true, - "strictNullChecks": false, - "esModuleInterop": true, - "skipLibCheck": true, - "forceConsistentCasingInFileNames": true, "resolveJsonModule": true, "types": [ "vitest/globals", "vitest/importMeta" - ] + ], + "strict": true, + "strictNullChecks": false, + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "skipLibCheck": true } -} \ No newline at end of file +} diff --git a/packages/site/src/db-tests/update-sense.test.ts b/packages/site/src/db-tests/update-sense.test.ts index 5c88ea442..0a5bd9061 100644 --- a/packages/site/src/db-tests/update-sense.test.ts +++ b/packages/site/src/db-tests/update-sense.test.ts @@ -1,8 +1,8 @@ -import type { ContentUpdateRequestBody, ContentUpdateResponseBody } from '$api/db/content-update/+server'; -import { post_request } from '$lib/helpers/get-post-requests'; -import { first_entry_id, seeded_dictionary_id, seeded_user_id_1, seeded_user_id_2 } from '$lib/mocks/seed/tables'; -import { reset_db } from '$lib/mocks/seed/write-seed-and-reset-db'; -import { admin_supabase, anon_supabase, uuid_template } from './clients'; +import { admin_supabase, anon_supabase, uuid_template } from './clients' +import type { ContentUpdateRequestBody, ContentUpdateResponseBody } from '$api/db/content-update/+server' +import { post_request } from '$lib/helpers/get-post-requests' +import { first_entry_id, seeded_dictionary_id, seeded_user_id_1, seeded_user_id_2 } from '$lib/mocks/seed/tables' +import { reset_db } from '$lib/mocks/seed/write-seed-and-reset-db' const content_update_endpoint = 'http://localhost:3041/api/db/content-update' @@ -24,7 +24,7 @@ describe('sense operations', () => { const { error } = await post_request(content_update_endpoint, { id: incremental_consistent_uuid(), auth_token: null, - user_id_from_test: seeded_user_id_1, + user_id_from_local: seeded_user_id_1, dictionary_id: seeded_dictionary_id, entry_id: first_entry_id, sense_id: first_entry_first_sense_id, @@ -33,8 +33,8 @@ describe('sense operations', () => { sense: { noun_class: { new: '2', - } - } + }, + }, }, timestamp: new Date('2024-03-08T00:44:04.600392+00:00').toISOString(), }) @@ -60,7 +60,7 @@ describe('sense operations', () => { const { error } = await post_request(content_update_endpoint, { id: incremental_consistent_uuid(), auth_token: null, - user_id_from_test: seeded_user_id_2, + user_id_from_local: seeded_user_id_2, dictionary_id: seeded_dictionary_id, entry_id: first_entry_id, sense_id: first_entry_first_sense_id, @@ -69,8 +69,8 @@ describe('sense operations', () => { sense: { parts_of_speech: { new: ['n', 'v'], - } - } + }, + }, }, timestamp: new Date('2024-03-08T00:44:04.600392+00:00').toISOString(), }) @@ -93,7 +93,7 @@ describe('sense operations', () => { }, ] `) - }); + }) test('updated_by is set to the second user but created_by is left alone', async () => { const { data } = await admin_supabase.from('senses').select().eq('id', first_entry_first_sense_id).single() @@ -102,12 +102,11 @@ describe('sense operations', () => { }) }) - test('adds glosses field to second sense in first entry', async () => { const { error } = await post_request(content_update_endpoint, { id: incremental_consistent_uuid(), auth_token: null, - user_id_from_test: seeded_user_id_1, + user_id_from_local: seeded_user_id_1, dictionary_id: seeded_dictionary_id, entry_id: first_entry_id, sense_id: first_entry_second_sense_id, @@ -118,9 +117,9 @@ describe('sense operations', () => { new: { en: 'Hi', es: 'Hola', - } - } - } + }, + }, + }, }, timestamp: new Date('2024-03-08T00:44:04.600392+00:00').toISOString(), }) @@ -151,13 +150,13 @@ describe('sense operations', () => { }, ] `) - }); + }) test('add a third sense to first entry with a glosses field', async () => { const { error } = await post_request(content_update_endpoint, { id: incremental_consistent_uuid(), auth_token: null, - user_id_from_test: seeded_user_id_1, + user_id_from_local: seeded_user_id_1, dictionary_id: seeded_dictionary_id, entry_id: first_entry_id, sense_id: first_entry_third_sense_id, @@ -166,8 +165,8 @@ describe('sense operations', () => { sense: { semantic_domains: { new: ['1', '2'], - } - } + }, + }, }, timestamp: new Date('2024-03-08T00:44:04.600392+00:00').toISOString(), }) @@ -204,13 +203,13 @@ describe('sense operations', () => { }, ] `) - }); + }) test('delete the third sense from the first entry', async () => { const { error } = await post_request(content_update_endpoint, { id: incremental_consistent_uuid(), auth_token: null, - user_id_from_test: seeded_user_id_1, + user_id_from_local: seeded_user_id_1, dictionary_id: seeded_dictionary_id, entry_id: first_entry_id, sense_id: first_entry_third_sense_id, @@ -218,7 +217,7 @@ describe('sense operations', () => { change: { sense: { deleted: true, - } + }, }, timestamp: new Date('2024-03-08T00:44:04.600392+00:00').toISOString(), }) @@ -248,8 +247,8 @@ describe('sense operations', () => { }, ] `) - }); -}); + }) +}) describe('sense sentence operations', () => { const first_sentence_id = incremental_consistent_uuid() @@ -261,7 +260,7 @@ describe('sense sentence operations', () => { const { data, error } = await post_request(content_update_endpoint, { id: change_id, auth_token: null, - user_id_from_test: seeded_user_id_1, + user_id_from_local: seeded_user_id_1, dictionary_id: seeded_dictionary_id, sentence_id: first_sentence_id, sense_id: first_entry_first_sense_id, @@ -272,8 +271,8 @@ describe('sense sentence operations', () => { new: { lo1: 'abcd efgh ijkl', }, - } - } + }, + }, }, timestamp: new Date('2024-03-08T00:44:04.600392+00:00').toISOString(), }) @@ -294,6 +293,7 @@ describe('sense sentence operations', () => { "dictionary_id": "dictionary1", "entry_id": null, "id": "11111111-1111-1111-1111-111111111104", + "import_id": null, "photo_id": null, "sense_id": "11111111-1111-1111-1111-111111111100", "sentence_id": "11111111-1111-1111-1111-111111111103", @@ -304,7 +304,7 @@ describe('sense sentence operations', () => { "user_id": "12345678-abcd-efab-cdef-123456789012", "video_id": null, } - `); + `) }) test('change is in content_updates', async () => { @@ -326,6 +326,7 @@ describe('sense sentence operations', () => { "dictionary_id": "dictionary1", "entry_id": null, "id": "11111111-1111-1111-1111-111111111104", + "import_id": null, "photo_id": null, "sense_id": "11111111-1111-1111-1111-111111111100", "sentence_id": "11111111-1111-1111-1111-111111111103", @@ -385,8 +386,8 @@ describe('sense sentence operations', () => { }, ] `) - }); - }); + }) + }) describe('add translation to sentence', () => { const change_id = incremental_consistent_uuid() @@ -395,7 +396,7 @@ describe('sense sentence operations', () => { const { data, error } = await post_request(content_update_endpoint, { id: change_id, auth_token: null, - user_id_from_test: seeded_user_id_1, + user_id_from_local: seeded_user_id_1, dictionary_id: seeded_dictionary_id, sentence_id: first_sentence_id, sense_id: first_entry_first_sense_id, @@ -406,8 +407,8 @@ describe('sense sentence operations', () => { new: { en: 'I am hungry', }, - } - } + }, + }, }, timestamp: new Date('2024-03-09T00:44:04.600392+00:00').toISOString(), }) @@ -428,6 +429,7 @@ describe('sense sentence operations', () => { "dictionary_id": "dictionary1", "entry_id": null, "id": "11111111-1111-1111-1111-111111111105", + "import_id": null, "photo_id": null, "sense_id": "11111111-1111-1111-1111-111111111100", "sentence_id": "11111111-1111-1111-1111-111111111103", @@ -438,7 +440,7 @@ describe('sense sentence operations', () => { "user_id": "12345678-abcd-efab-cdef-123456789012", "video_id": null, } - `); + `) }) test('change is in content_updates', async () => { @@ -463,15 +465,15 @@ describe('sense sentence operations', () => { }, ] `) - }); - }); + }) + }) test('update sentence text updates just the text and leaves translation alone', async () => { const change_id = incremental_consistent_uuid() await post_request(content_update_endpoint, { id: change_id, auth_token: null, - user_id_from_test: seeded_user_id_1, + user_id_from_local: seeded_user_id_1, dictionary_id: seeded_dictionary_id, sentence_id: first_sentence_id, sense_id: first_entry_first_sense_id, @@ -484,9 +486,9 @@ describe('sense sentence operations', () => { }, old: { lo1: 'abcd efgh ijkl', - } - } - } + }, + }, + }, }, timestamp: new Date('2024-03-09T00:44:04.600392+00:00').toISOString(), }) @@ -506,13 +508,70 @@ describe('sense sentence operations', () => { }, ] `) - }); + }) + + test('add another translation to the same sentence', async () => { + const { data: { senses: old_senses } } = await anon_supabase.from('entries_view').select().eq('id', first_entry_id).single() + const change_id = incremental_consistent_uuid() + const { data, error } = await post_request(content_update_endpoint, { + id: change_id, + auth_token: null, + user_id_from_local: seeded_user_id_1, + dictionary_id: seeded_dictionary_id, + sentence_id: first_sentence_id, + sense_id: first_entry_first_sense_id, + table: 'sentences', + change: { + sentence: { + translation: { + new: { + ...old_senses[0].sentences[0].translation, + es: 'Estoy hambriento', + }, + }, + }, + }, + timestamp: new Date('2024-03-09T00:44:04.600392+00:00').toISOString(), + }) + + expect(error?.message).toBeFalsy() + expect(data.change).toMatchInlineSnapshot(` + { + "sentence": { + "translation": { + "new": { + "en": "I am hungry", + "es": "Estoy hambriento", + }, + }, + }, + } + `) + }) + + test('see changes in entries_view ', async () => { + const { data: { senses } } = await anon_supabase.from('entries_view').select().eq('id', first_entry_id).single() + expect(senses[0].sentences).toMatchInlineSnapshot(` + [ + { + "id": "11111111-1111-1111-1111-111111111103", + "text": { + "lo1": "abcd efgh", + }, + "translation": { + "en": "I am hungry", + "es": "Estoy hambriento", + }, + }, + ] + `) + }) test('remove sentence from sense', async () => { const { error } = await post_request(content_update_endpoint, { id: incremental_consistent_uuid(), auth_token: null, - user_id_from_test: seeded_user_id_1, + user_id_from_local: seeded_user_id_1, dictionary_id: seeded_dictionary_id, sentence_id: first_sentence_id, sense_id: first_entry_first_sense_id, @@ -520,7 +579,7 @@ describe('sense sentence operations', () => { change: { sentence: { removed_from_sense: true, - } + }, }, timestamp: new Date('2024-03-08T00:44:04.600392+00:00').toISOString(), }) @@ -540,7 +599,7 @@ describe('sense sentence operations', () => { } `) }) -}); +}) // test: add translation to pre-existing sentence without sense id // test: add a second sense to the same sense and make sure there are two sentences in that sense diff --git a/packages/site/src/lib/glosses/glossing-languages.ts b/packages/site/src/lib/glosses/glossing-languages.ts index 58cdbf71c..b5a81faf5 100644 --- a/packages/site/src/lib/glosses/glossing-languages.ts +++ b/packages/site/src/lib/glosses/glossing-languages.ts @@ -3,9 +3,10 @@ // internalName pulled from: https://keyman.com/keyboards > search for keyboard, select one with desktop and mobile web if possible, then copy Keyboard ID. Alternatively, all keyboards available can be seen at https://keyman.com/developer/keymanweb/keyboards // Latin script options: european, sil_euro_latin, basic_kbdus, us -import type { IGlossLanguages } from '@living-dictionaries/types'; -import glossing_languages_list from './glossing-languages-list.json'; -export const glossingLanguages: IGlossLanguages = glossing_languages_list; +import type { IGlossLanguages } from '@living-dictionaries/types' +import glossing_languages_list from './glossing-languages-list.json' + +export const glossingLanguages: IGlossLanguages = glossing_languages_list export const additionalKeyboards: IGlossLanguages = { 'srb-sora': { @@ -18,4 +19,6 @@ export const additionalKeyboards: IGlossLanguages = { internalName: 'basic_kbdolch', showKeyboard: true, }, -}; +} + +export type Glossing_Languages = keyof typeof glossing_languages_list diff --git a/packages/site/src/lib/mocks/seed/write-seed-and-reset-db.ts b/packages/site/src/lib/mocks/seed/write-seed-and-reset-db.ts index 19889d6ad..64bee8fca 100644 --- a/packages/site/src/lib/mocks/seed/write-seed-and-reset-db.ts +++ b/packages/site/src/lib/mocks/seed/write-seed-and-reset-db.ts @@ -1,5 +1,5 @@ +import { writeFileSync } from 'node:fs' import { execute_sql_query_on_db } from './postgres' -import { writeFileSync } from 'fs' import { sql_file_string } from './to-sql-string' import { users } from './tables' diff --git a/packages/site/src/lib/supabase/change/sense.ts b/packages/site/src/lib/supabase/change/sense.ts index faeb59cc0..527e05f5b 100644 --- a/packages/site/src/lib/supabase/change/sense.ts +++ b/packages/site/src/lib/supabase/change/sense.ts @@ -1,15 +1,15 @@ -import { authState } from 'sveltefirets'; -import { get } from 'svelte/store'; -import { page } from '$app/stores'; -import { invalidate } from '$app/navigation'; -import { ENTRY_UPDATED_LOAD_TRIGGER } from '$lib/dbOperations'; -import { post_request } from '$lib/helpers/get-post-requests'; -import type { ContentUpdateRequestBody, ContentUpdateResponseBody } from '$api/db/content-update/+server'; +import { authState } from 'sveltefirets' +import { get } from 'svelte/store' +import { page } from '$app/stores' +import { invalidate } from '$app/navigation' +import { ENTRY_UPDATED_LOAD_TRIGGER } from '$lib/dbOperations' +import { post_request } from '$lib/helpers/get-post-requests' +import type { ContentUpdateRequestBody, ContentUpdateResponseBody } from '$api/db/content-update/+server' -export async function update_sense({change, entry_id, sense_id }: {change: ContentUpdateRequestBody['change']['sense'], entry_id: string, sense_id: string }) { +export async function update_sense({ change, entry_id, sense_id }: { change: ContentUpdateRequestBody['change']['sense'], entry_id: string, sense_id: string }) { try { - const auth_state_user = get(authState); - const auth_token = await auth_state_user.getIdToken(); + const auth_state_user = get(authState) + const auth_token = await auth_state_user.getIdToken() const { params: { dictionaryId: dictionary_id } } = get(page) @@ -22,14 +22,14 @@ export async function update_sense({change, entry_id, sense_id }: {change: Conte table: 'senses', change: { sense: change }, timestamp: new Date().toISOString(), - }); + }) if (error) - throw new Error(error.message); + throw new Error(error.message) await invalidate(ENTRY_UPDATED_LOAD_TRIGGER) } catch (err) { - alert(err); - console.error(err); + alert(err) + console.error(err) } } diff --git a/packages/site/src/lib/supabase/generated.types.ts b/packages/site/src/lib/supabase/generated.types.ts index f5568c3db..140fdc6bb 100644 --- a/packages/site/src/lib/supabase/generated.types.ts +++ b/packages/site/src/lib/supabase/generated.types.ts @@ -131,6 +131,7 @@ export interface Database { dictionary_id: string entry_id: string | null id: string + import_id: string | null photo_id: string | null sense_id: string | null sentence_id: string | null @@ -147,6 +148,7 @@ export interface Database { dictionary_id: string entry_id?: string | null id: string + import_id?: string | null photo_id?: string | null sense_id?: string | null sentence_id?: string | null @@ -163,6 +165,7 @@ export interface Database { dictionary_id?: string entry_id?: string | null id?: string + import_id?: string | null photo_id?: string | null sense_id?: string | null sentence_id?: string | null @@ -1310,75 +1313,75 @@ export interface Database { export type Tables< PublicTableNameOrOptions extends - | keyof (Database['public']['Tables'] & Database['public']['Views']) - | { schema: keyof Database }, + | keyof (Database['public']['Tables'] & Database['public']['Views']) + | { schema: keyof Database }, TableName extends PublicTableNameOrOptions extends { schema: keyof Database } ? keyof (Database[PublicTableNameOrOptions['schema']]['Tables'] & - Database[PublicTableNameOrOptions['schema']]['Views']) + Database[PublicTableNameOrOptions['schema']]['Views']) : never = never, > = PublicTableNameOrOptions extends { schema: keyof Database } ? (Database[PublicTableNameOrOptions['schema']]['Tables'] & - Database[PublicTableNameOrOptions['schema']]['Views'])[TableName] extends { + Database[PublicTableNameOrOptions['schema']]['Views'])[TableName] extends { Row: infer R } - ? R - : never + ? R + : never : PublicTableNameOrOptions extends keyof (Database['public']['Tables'] & - Database['public']['Views']) + Database['public']['Views']) ? (Database['public']['Tables'] & - Database['public']['Views'])[PublicTableNameOrOptions] extends { + Database['public']['Views'])[PublicTableNameOrOptions] extends { Row: infer R } - ? R - : never + ? R + : never : never export type TablesInsert< PublicTableNameOrOptions extends - | keyof Database['public']['Tables'] - | { schema: keyof Database }, + | keyof Database['public']['Tables'] + | { schema: keyof Database }, TableName extends PublicTableNameOrOptions extends { schema: keyof Database } ? keyof Database[PublicTableNameOrOptions['schema']]['Tables'] : never = never, > = PublicTableNameOrOptions extends { schema: keyof Database } ? Database[PublicTableNameOrOptions['schema']]['Tables'][TableName] extends { - Insert: infer I - } + Insert: infer I + } ? I : never : PublicTableNameOrOptions extends keyof Database['public']['Tables'] ? Database['public']['Tables'][PublicTableNameOrOptions] extends { - Insert: infer I - } + Insert: infer I + } ? I : never : never export type TablesUpdate< PublicTableNameOrOptions extends - | keyof Database['public']['Tables'] - | { schema: keyof Database }, + | keyof Database['public']['Tables'] + | { schema: keyof Database }, TableName extends PublicTableNameOrOptions extends { schema: keyof Database } ? keyof Database[PublicTableNameOrOptions['schema']]['Tables'] : never = never, > = PublicTableNameOrOptions extends { schema: keyof Database } ? Database[PublicTableNameOrOptions['schema']]['Tables'][TableName] extends { - Update: infer U - } + Update: infer U + } ? U : never : PublicTableNameOrOptions extends keyof Database['public']['Tables'] ? Database['public']['Tables'][PublicTableNameOrOptions] extends { - Update: infer U - } + Update: infer U + } ? U : never : never export type Enums< PublicEnumNameOrOptions extends - | keyof Database['public']['Enums'] - | { schema: keyof Database }, + | keyof Database['public']['Enums'] + | { schema: keyof Database }, EnumName extends PublicEnumNameOrOptions extends { schema: keyof Database } ? keyof Database[PublicEnumNameOrOptions['schema']]['Enums'] : never = never, @@ -1387,4 +1390,3 @@ export type Enums< : PublicEnumNameOrOptions extends keyof Database['public']['Enums'] ? Database['public']['Enums'][PublicEnumNameOrOptions] : never - diff --git a/packages/site/src/routes/[dictionaryId]/entry/[entryId]/EntryField.svelte b/packages/site/src/routes/[dictionaryId]/entry/[entryId]/EntryField.svelte index f2f754b1b..cbe0f29ca 100644 --- a/packages/site/src/routes/[dictionaryId]/entry/[entryId]/EntryField.svelte +++ b/packages/site/src/routes/[dictionaryId]/entry/[entryId]/EntryField.svelte @@ -1,14 +1,14 @@ {#if value || can_edit} @@ -19,7 +19,7 @@ class:hover:bg-gray-100={can_edit} class:cursor-pointer={can_edit} class:order-2={!value}> - {#if field != 'lexeme'} + {#if field !== 'lexeme'}
{display}
{/if}
export const POST: RequestHandler = async ({ request }) => { try { - const { id, user_id_from_test, auth_token, table, dictionary_id, audio_id, entry_id, photo_id, speaker_id, text_id, video_id, sentence_id, sense_id, change, timestamp } = await request.json() as ContentUpdateRequestBody + const { id, user_id_from_local, auth_token, table, dictionary_id, audio_id, entry_id, photo_id, speaker_id, text_id, video_id, sentence_id, sense_id, change, import_id, timestamp } = await request.json() as ContentUpdateRequestBody if (audio_id) throw new Error('audio_id change not implemented') @@ -85,8 +28,9 @@ export const POST: RequestHandler = async ({ request }) => { const adminSupabase = getAdminSupabaseClient() - let user_id = user_id_from_test - if (!dev || !user_id_from_test) { + let user_id = user_id_from_local + const is_deployed = !dev + if (is_deployed || auth_token) { const decodedToken = await decodeToken(auth_token) if (!decodedToken?.uid) throw new Error('No user id found in token') @@ -102,6 +46,9 @@ export const POST: RequestHandler = async ({ request }) => { user_id = data.id } + if (!user_id) + throw new Error('No user id found. Pass it into the user_id_from_local field or use a valid auth_token.') + const { data: dictionary } = await adminSupabase.from('dictionaries').select().eq('id', dictionary_id).single() if (!dictionary) { const { error: add_dictionary_error } = await adminSupabase.from('dictionaries').insert({ @@ -198,6 +145,7 @@ export const POST: RequestHandler = async ({ request }) => { sense_id, timestamp, table, + import_id, change, }) .select() diff --git a/packages/types/gloss.interface.ts b/packages/types/gloss.interface.ts index c42752125..765e2e8ac 100644 --- a/packages/types/gloss.interface.ts +++ b/packages/types/gloss.interface.ts @@ -1,4 +1,21 @@ -/* eslint-disable @typescript-eslint/consistent-indexed-object-style */ +/* eslint-disable ts/consistent-indexed-object-style */ export interface MultiString { - [gloss_language_bcp: string]: string; + [language_bcp__or_writing_system_id: string]: string } + +// in entry.lexeme, sentence.text, entry.notes contexts, this would be different writing system ids: +// { +// 'pinyin-with-tone-number': 'ni3hao3', // used to be just plain lexeme field (lx) +// 'pinyin-with-tone-mark': 'nǐhǎo', // used to be local_orthogaphy_1 (lo1) +// 'traditional': '你好', +// } + +// Elsewhere in the app, we will map writing system ids to user friendly names (maybe i18n) and other needs things like fonts/keyboards + +// in entry.gloss, sentence.translation contexts this would be different language writing system bcp codes: +// { +// "en": "hello", +// "es": "hola", +// "zh-TW": "你好", +// "zh-CN": "你好", +// } diff --git a/packages/types/index.ts b/packages/types/index.ts index a968592ad..6963da974 100644 --- a/packages/types/index.ts +++ b/packages/types/index.ts @@ -20,3 +20,4 @@ export type { HelperRoles, IHelper } from './helper.interface' export type { AlgoliaEntry } from './entry.algolia.interface' export { type IPrintFields, StandardPrintFields } from './print-entry.interface' export { EntryFields, type EntryFieldValue, type i18nEntryFieldKey } from './entry-fields.enum' +export type { ContentUpdateRequestBody } from './supabase/content-update.interface' diff --git a/packages/types/supabase/content-update.interface.ts b/packages/types/supabase/content-update.interface.ts new file mode 100644 index 000000000..908d05af5 --- /dev/null +++ b/packages/types/supabase/content-update.interface.ts @@ -0,0 +1,60 @@ +import type { MultiString } from '../.' + +export interface ContentUpdateRequestBody { + id: string // id of the change, a uuidv4 created on client to make things idempotent + user_id_from_local?: string + auth_token: string + dictionary_id: string + entry_id?: string + sense_id?: string + sentence_id?: string + text_id?: string + audio_id?: string + video_id?: string + photo_id?: string + speaker_id?: string + table: 'entries' | 'senses' | 'sentences' | 'senses_in_sentences' | 'texts' | 'audio' | 'video' | 'photo' | 'speakers' | 'audio_speakers' | 'video_speakers' | 'sense_videos' | 'sentence_videos' | 'sense_photos' | 'sentence_photos' // handcopied from Database['public']['Enums']['content_tables'] so Supabase types may need brought into @livingdictionaries/types + change: { + sense?: { + glosses?: { + new: MultiString + old?: MultiString + } + definition?: { + new: MultiString + old?: MultiString + } + noun_class?: { + new: string + old?: string + } + parts_of_speech?: { + new: string[] + old?: string[] + } + semantic_domains?: { + new: string[] + old?: string[] + } + write_in_semantic_domains?: { + new: string[] + old?: string[] + } + deleted?: boolean + } + sentence?: { + text?: { + new: MultiString + old?: MultiString + } + translation?: { + new: MultiString + old?: MultiString + } + removed_from_sense?: boolean // currently also deletes the sentence - later when a sentence can be connected to multiple sentences, use a deleted field to indicate the sentence is deleted everywhere + // deleted?: boolean; + } + } + import_id?: string + timestamp: string +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index af6019eb4..f8a79ee9f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -87,6 +87,9 @@ importers: '@living-dictionaries/types': specifier: ^1.0.0 version: 1.0.0 + '@supabase/supabase-js': + specifier: ^2.38.4 + version: 2.39.7 '@types/node': specifier: ^18.11.18 version: 18.11.18 @@ -105,6 +108,9 @@ importers: csvtojson: specifier: ^2.0.10 version: 2.0.10 + detect-port: + specifier: ^1.6.1 + version: 1.6.1 dotenv: specifier: ^16.0.2 version: 16.0.2 @@ -2325,9 +2331,6 @@ packages: '@types/node@20.11.0': resolution: {integrity: sha512-o9bjXmDNcF7GbM4CNQpmi+TutCgap/K3w1JyKgxAjqx41zp9qlIAVFi0IhCNsJcXolEqLWhbFbEeL0PvYm4pcQ==} - '@types/node@20.11.20': - resolution: {integrity: sha512-7/rR21OS+fq8IyHTgtLkDK949uzsa6n8BkziAKtPVpugIkO6D+/ooXMvzXxDnZrmtXVfjb1bKQafYpb8s89LOg==} - '@types/normalize-package-data@2.4.4': resolution: {integrity: sha512-37i+OaWTh9qeK4LSHPsyRC7NahnGotNuZvjLSgcPzblpHB3rrCJxAOgI5gCdKm7coonsaX1Of0ILiTcnZjbfxA==} @@ -2697,6 +2700,10 @@ packages: engines: {node: '>=0.4.0'} hasBin: true + address@1.2.2: + resolution: {integrity: sha512-4B/qKCfeE/ODUaAUpSwfzazo5x29WD4r3vXiWsB7I2mSDAihwEqKO+g8GELZUQSSAo5e1XTYh3ZVfLyxBc12nA==} + engines: {node: '>= 10.0.0'} + agent-base@6.0.2: resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} engines: {node: '>= 6.0.0'} @@ -3473,6 +3480,11 @@ packages: resolution: {integrity: sha512-reYkTUJAZb9gUuZ2RvVCNhVHdg62RHnJ7WJl8ftMi4diZ6NWlciOzQN88pUhSELEwflJht4oQDv0F0BMlwaYtA==} engines: {node: '>=8'} + detect-port@1.6.1: + resolution: {integrity: sha512-CmnVc+Hek2egPx1PeTFVta2W78xy2K/9Rkf6cC4T59S50tVnzKj+tnx5mmx5lwvCkujZ4uRrpRSuV+IVs3f90Q==} + engines: {node: '>= 4.0.0'} + hasBin: true + devalue@4.3.2: resolution: {integrity: sha512-KqFl6pOgOW+Y6wJgu80rHpo2/3H07vr8ntR9rkkFIRETewbf5GaYYcakYfiKz89K+sLsuPkQIZaXDMjUObZwWg==} @@ -8572,7 +8584,7 @@ snapshots: '@grpc/grpc-js@1.8.14': dependencies: '@grpc/proto-loader': 0.7.7 - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@grpc/grpc-js@1.9.13': dependencies: @@ -10042,12 +10054,12 @@ snapshots: '@types/body-parser@1.19.2': dependencies: '@types/connect': 3.4.35 - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/body-parser@1.19.5': dependencies: '@types/connect': 3.4.38 - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/caseless@0.12.5': optional: true @@ -10205,11 +10217,11 @@ snapshots: '@types/connect@3.4.35': dependencies: - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/connect@3.4.38': dependencies: - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/cookie@0.6.0': {} @@ -10240,13 +10252,13 @@ snapshots: '@types/express-serve-static-core@4.17.33': dependencies: - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/qs': 6.9.7 '@types/range-parser': 1.2.4 '@types/express-serve-static-core@4.17.41': dependencies: - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/qs': 6.9.11 '@types/range-parser': 1.2.7 '@types/send': 0.17.4 @@ -10275,7 +10287,7 @@ snapshots: '@types/glob@8.1.0': dependencies: '@types/minimatch': 5.1.2 - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/google.maps@3.48.3': {} @@ -10295,7 +10307,7 @@ snapshots: '@types/jsonwebtoken@9.0.5': dependencies: - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/linkify-it@3.0.2': {} @@ -10339,10 +10351,6 @@ snapshots: dependencies: undici-types: 5.26.5 - '@types/node@20.11.20': - dependencies: - undici-types: 5.26.5 - '@types/normalize-package-data@2.4.4': {} '@types/parse5@6.0.3': {} @@ -10370,7 +10378,7 @@ snapshots: '@types/request@2.48.12': dependencies: '@types/caseless': 0.12.5 - '@types/node': 20.11.0 + '@types/node': 18.18.0 '@types/tough-cookie': 4.0.5 form-data: 2.5.1 optional: true @@ -10378,7 +10386,7 @@ snapshots: '@types/rimraf@3.0.2': dependencies: '@types/glob': 8.1.0 - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/semver@7.5.3': {} @@ -10387,18 +10395,18 @@ snapshots: '@types/send@0.17.4': dependencies: '@types/mime': 1.3.5 - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/serve-static@1.15.1': dependencies: '@types/mime': 3.0.1 - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/serve-static@1.15.5': dependencies: '@types/http-errors': 2.0.4 '@types/mime': 3.0.4 - '@types/node': 18.11.18 + '@types/node': 18.18.0 '@types/tough-cookie@4.0.5': optional: true @@ -10407,7 +10415,7 @@ snapshots: '@types/ws@8.5.10': dependencies: - '@types/node': 20.11.20 + '@types/node': 18.18.0 '@types/yoga-layout@1.9.2': {} @@ -10894,6 +10902,8 @@ snapshots: acorn@8.11.3: {} + address@1.2.2: {} + agent-base@6.0.2: dependencies: debug: 4.3.4 @@ -11707,6 +11717,13 @@ snapshots: detect-indent@6.1.0: {} + detect-port@1.6.1: + dependencies: + address: 1.2.2 + debug: 4.3.4 + transitivePeerDependencies: + - supports-color + devalue@4.3.2: {} diff-sequences@29.6.3: {} @@ -14959,7 +14976,7 @@ snapshots: '@protobufjs/path': 1.1.2 '@protobufjs/pool': 1.1.0 '@protobufjs/utf8': 1.1.0 - '@types/node': 20.11.0 + '@types/node': 18.18.0 long: 5.2.3 protobufjs@7.2.5: @@ -14974,7 +14991,7 @@ snapshots: '@protobufjs/path': 1.1.2 '@protobufjs/pool': 1.1.0 '@protobufjs/utf8': 1.1.0 - '@types/node': 18.11.18 + '@types/node': 18.18.0 long: 5.2.3 proxy-addr@2.0.7: diff --git a/supabase/ideas/manager_policy.sql b/supabase/ideas/manager_policy.sql new file mode 100644 index 000000000..2aa81e0ad --- /dev/null +++ b/supabase/ideas/manager_policy.sql @@ -0,0 +1,5 @@ +CREATE POLICY manager_policy +ON imports +USING (is_manager(current_setting('auth.uid')::uuid, dictionary_id)) +FOR SELECT +USING (is_manager(current_setting('auth.uid')::uuid, dictionary_id)); diff --git a/supabase/migrations/20240225012557_updates.sql b/supabase/migrations/20240225012557_updates.sql index f5b25011a..fbccafe28 100644 --- a/supabase/migrations/20240225012557_updates.sql +++ b/supabase/migrations/20240225012557_updates.sql @@ -13,8 +13,9 @@ CREATE TABLE content_updates ( photo_id uuid REFERENCES photos, speaker_id uuid REFERENCES speakers, "table" content_tables NOT NULL, - change jsonb NOT NULL, -- includes import_id + change jsonb NOT NULL, "timestamp" timestamp with time zone NOT NULL DEFAULT now() + -- import_id uuid -- added later ); ALTER TABLE content_updates ENABLE ROW LEVEL SECURITY; diff --git a/supabase/migrations/20240322012208_import-id.sql b/supabase/migrations/20240322012208_import-id.sql new file mode 100644 index 000000000..ff6ce70c4 --- /dev/null +++ b/supabase/migrations/20240322012208_import-id.sql @@ -0,0 +1,2 @@ +ALTER TABLE content_updates +ADD COLUMN import_id uuid; \ No newline at end of file