Imports can have additional senses (#404)

* use the same column names for all other example sentences even with multiple senses * allow multiples sentences linked to the same sense * check for dev server when importing entries * type import row everywhere, add speaker right away when new speaker encountered --------- Co-authored-by: livingtongues <[email protected]> Co-authored-by: Jacob Bowdoin <[email protected]>
livingtongues · Jul 11, 2024 · 3f4890a · 3f4890a
1 parent 4d584dc
commit 3f4890a
Show file tree

Hide file tree

Showing 47 changed files with 2,832 additions and 641 deletions.
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -21,9 +21,11 @@ jobs:
       - run: pnpm install
 
       - name: Run ESLint on changed files
-        uses: tj-actions/eslint-changed-files@v24
+        uses: tj-actions/eslint-changed-files@v25
         with:
           config_path: eslint.config.js
+          # escape_paths: false - needed if using SvelteKit parenthesis in routes
+          level: error
           file_extensions: |
             **/*.ts
             **/*.js

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -91,6 +91,8 @@
     "a11y-no-noninteractive-element-interactions": "ignore",
   },
 
+  "comments.visible": false,
+
   "deno.enable": true,
   "deno.lint": true,
   "deno.enablePaths": [

diff --git a/FLEx.model.ts b/FLEx.model.ts
@@ -1,10 +1,10 @@
 interface Entry {
-  id: string;
-  lexemeForm: MultiString; // thanks for this inspiration - we started with just a lexeme string field, and then people asked for more orthographies and we made them second class citizens as optional alternate orthographies. This accomplishes the same purpose as the multi-string here but it's not as elegant and has pain points. For example, once someone decided they wanted to make an alternate orthography the main orthography, but they couldn't. So I don't like it our current model and will use a MultiString after our migration.
-  citationForm: MultiString; // Am I correct that citation form is a convention from a world where print is the only medium? We don't have this field. In my opinion, if you have a lexeme that is important enough to add and gloss, like "radii" and the citation form is "radius", then in a digital dictionary, these belong as two separate entries with a relationship from radii to radius. Not sure what the relationship would be called but something like "child-citation" indicating that the "radii" entry is really an offshoot of the base word "radius". But at the end of the day we do have a very simple print view, so print conventions are still in view but in our world they are second-class citizens. Web usage with easy bouncing between entries via links is first-class. However, we don't have a system for relationships yet. That will be a further down the road benefit of our migration. In that it will be easy to indicate relationships between entries. For now we do have a few additional fields users can use to add some basic info like a "plural_form" field, and a deprecated "variant" field. I don't really like these but we have them at the moment.
-  literalMeaning: MultiString; // What is this field for? We have nothing like it. Meaning is based on sense and you already have gloss and definition fields there.
-  senses: Sense[];
-  note: MultiString; // our notes field is just a string - is this going to cause grief when importing flex data. Is this designed this way for people to write notes in whatever writing system they like and be able to have fonts applied appropriately?
+  id: string
+  lexemeForm: MultiString // thanks for this inspiration - we started with just a lexeme string field, and then people asked for more orthographies and we made them second class citizens as optional alternate orthographies. This accomplishes the same purpose as the multi-string here but it's not as elegant and has pain points. For example, once someone decided they wanted to make an alternate orthography the main orthography, but they couldn't. So I don't like it our current model and will use a MultiString after our migration.
+  citationForm: MultiString // Am I correct that citation form is a convention from a world where print is the only medium? We don't have this field. In my opinion, if you have a lexeme that is important enough to add and gloss, like "radii" and the citation form is "radius", then in a digital dictionary, these belong as two separate entries with a relationship from radii to radius. Not sure what the relationship would be called but something like "child-citation" indicating that the "radii" entry is really an offshoot of the base word "radius". But at the end of the day we do have a very simple print view, so print conventions are still in view but in our world they are second-class citizens. Web usage with easy bouncing between entries via links is first-class. However, we don't have a system for relationships yet. That will be a further down the road benefit of our migration. In that it will be easy to indicate relationships between entries. For now we do have a few additional fields users can use to add some basic info like a "plural_form" field, and a deprecated "variant" field. I don't really like these but we have them at the moment.
+  literalMeaning: MultiString // What is this field for? We have nothing like it. Meaning is based on sense and you already have gloss and definition fields there.
+  senses: Sense[]
+  note: MultiString // our notes field is just a string - we are going to move to using MultiString to allow for different analysis writing systems. Needed when importing flex data.
   // Additional fields we have
   // phonetic?: string;
   // morphology?: string;
@@ -16,63 +16,63 @@ interface Entry {
 }
 
 interface Sense {
-  id: string;
-  gloss: MultiString;
-  definition: MultiString; // we have this field used in our first dictionary but we don't show the field when it is empty (ie - we don't encourage it's use and just use glosses but that could change)
-  partOfSpeech: string; // this is an array because some entries serve as multiple parts of speech, we have a specific set which are keys that are translated in the UI (eg. "n" -> "noun" in English / "sustantivo" in Spanish)
-  semanticDomain: string[]; // we have a specific set which are keys that are translated in the UI (it's a majorly simplified system modeled after SemDom with some adjustments) a universal set of domains is nice for cross-linguistic work but doesn't always fit the semantic categories of a language so future growth in our semantic domains field could go a lot of different directions depending on needs, like accepting different systems (ie -SEMDOM) or letting a dictionary itself set up custom domains. We also plan to introduce tags, which would be multi-purpose for many different applications and that may negate the need for a dictionary to create their own domains.
+  id: string
+  gloss: MultiString
+  definition: MultiString // we have this field used in our first dictionary but we don't show the field when it is empty (ie - we don't encourage it's use and just use glosses but that could change)
+  partOfSpeech: string // this is an array because some entries serve as multiple parts of speech, we have a specific set which are keys that are translated in the UI (eg. "n" -> "noun" in English / "sustantivo" in Spanish)
+  semanticDomain: string[] // we have a specific set which are keys that are translated in the UI (it's a majorly simplified system modeled after SemDom with some adjustments) a universal set of domains is nice for cross-linguistic work but doesn't always fit the semantic categories of a language so future growth in our semantic domains field could go a lot of different directions depending on needs, like accepting different systems (ie -SEMDOM) or letting a dictionary itself set up custom domains. We also plan to introduce tags, which would be multi-purpose for many different applications and that may negate the need for a dictionary to create their own domains.
   // write_in_semantic_domains?: string[] // used to support legacy dictionaries, and obviously not translated. We show these and let users delete these and swap them out for the new system, but we don't allow editing or adding.
-  exampleSentences: ExampleSentence[];
+  exampleSentences: ExampleSentence[]
   // noun_class?: string; additional field we have
 }
 
 interface ExampleSentence { // upgrading these to be first class citizens called Sentence
-  id: string;
-  sentence: MultiString;
-  translation: MultiString;
-  reference: string; // further fields like this haven't been thought through yet but there's room to grow
+  id: string
+  sentence: MultiString
+  translation: MultiString
+  reference: string // further fields like this haven't been thought through yet but there's room to grow
 }
 
 interface MultiString {
-  values: Record<WritingSystemId, string>; // Our current use of something that's like MultiString doesn't nest values underneath a "values" key but it works the same way. It's just Record<bcp_string, string> as in `gloss: { "en": "dog", "es": "perro" }` - is there a good reason to nest under values beside leaving room for adding notes or something in the future? What is the reason for the "values" key? As I expand our use of this MultiString idea, I'd like to know more about your experience here.
+  values: Record<WritingSystemId, string> // Our current use of something that's like MultiString doesn't nest values underneath a "values" key but it works the same way. It's just Record<bcp_string, string> as in `gloss: { "en": "dog", "es": "perro" }` - is there a good reason to nest under values beside leaving room for adding notes or something in the future? What is the reason for the "values" key? As I expand our use of this MultiString idea, I'd like to know more about your experience here.
 }
 
 interface WritingSystem {
-  id: WritingSystemId;
-  name: string;
-  abbreviation: string;
-  font: string;
+  id: WritingSystemId
+  name: string
+  abbreviation: string
+  font: string
 }
 
 interface WritingSystems {
-  analysis: WritingSystem[]; // let's pretend I'm studying a Native American language. This could be English and Spanish for example...
-  vernacular: WritingSystem[]; // and this might be Latin script and a native script?
+  analysis: WritingSystem[] // let's pretend I'm studying a Native American language. This could be English and Spanish for example...
+  vernacular: WritingSystem[] // and this might be Latin script and a native script?
 }
 
-type WritingSystemId = string;
+type WritingSystemId = string
 
 export interface ILexboxApiHub {
-  GetWritingSystems(): Promise<WritingSystems>;
-  GetEntries(options: QueryOptions): Promise<Entry[]>;
-  SearchEntries(query: string, options: QueryOptions): Promise<Entry[]>;
-  GetEntry(id: string): Promise<Entry>;
-  CreateEntry(entry: Entry): Promise<Entry>;
-  UpdateEntry(id: string, update: JsonOperation[]): Promise<Entry>;
-  DeleteEntry(id: string): Promise<void>;
-  CreateSense(entryId: string, sense: Sense): Promise<Sense>;
-  UpdateSense(entryId: string, senseId: string, update: JsonOperation[]): Promise<Sense>;
-  DeleteSense(entryId: string, senseId: string): Promise<void>;
-  CreateExampleSentence(entryId: string, senseId: string, exampleSentence: ExampleSentence): Promise<ExampleSentence>;
-  UpdateExampleSentence(entryId: string, senseId: string, exampleSentenceId: string, update: JsonOperation[]): Promise<ExampleSentence>;
-  DeleteExampleSentence(entryId: string, senseId: string, exampleSentenceId: string): Promise<void>;
+  GetWritingSystems: () => Promise<WritingSystems>
+  GetEntries: (options: QueryOptions) => Promise<Entry[]>
+  SearchEntries: (query: string, options: QueryOptions) => Promise<Entry[]>
+  GetEntry: (id: string) => Promise<Entry>
+  CreateEntry: (entry: Entry) => Promise<Entry>
+  UpdateEntry: (id: string, update: JsonOperation[]) => Promise<Entry>
+  DeleteEntry: (id: string) => Promise<void>
+  CreateSense: (entryId: string, sense: Sense) => Promise<Sense>
+  UpdateSense: (entryId: string, senseId: string, update: JsonOperation[]) => Promise<Sense>
+  DeleteSense: (entryId: string, senseId: string) => Promise<void>
+  CreateExampleSentence: (entryId: string, senseId: string, exampleSentence: ExampleSentence) => Promise<ExampleSentence>
+  UpdateExampleSentence: (entryId: string, senseId: string, exampleSentenceId: string, update: JsonOperation[]) => Promise<ExampleSentence>
+  DeleteExampleSentence: (entryId: string, senseId: string, exampleSentenceId: string) => Promise<void>
 }
 
 interface QueryOptions {
-  order: string;
-  count: number;
-  offset: number;
+  order: string
+  count: number
+  offset: number
 }
 
 interface JsonOperation {
-  do_no_know_yet: string;
+  do_no_know_yet: string
 }
diff --git a/eslint.config.js b/eslint.config.js
@@ -50,7 +50,6 @@ export default antfu(
     files: ['**/*.test.ts'],
     rules: {
       'test/consistent-test-it': ['error', { fn: 'test' }],
-      'test/no-commented-out-tests': 'error',
       'test/no-disabled-tests': 'error',
       'test/consistent-test-filename': 'error',
       'test/expect-expect': 'error',
@@ -71,6 +70,7 @@ export default antfu(
       'test/prefer-to-have-length': 'error',
       'test/valid-describe-callback': 'error',
       'test/valid-expect': 'error',
+      'test/no-commented-out-tests': 'warn',
     },
   },
   {
@@ -87,6 +87,8 @@ export default antfu(
       'no-console': 'off',
       'ts/no-unused-vars': 'off',
       'ts/no-var-requires': 'off',
+      'node/prefer-global/process': 'off',
+      'unused-imports/no-unused-vars': 'off',
     },
   },
   {

diff --git a/packages/scripts/.gitignore b/packages/scripts/.gitignore
@@ -2,3 +2,4 @@ logs
 service-account*
 .env
 sheets-viewer-SA.json
+.env.supabase
diff --git a/packages/scripts/algolia/addDictionariesToIndex.ts b/packages/scripts/algolia/addDictionariesToIndex.ts
@@ -1,41 +1,39 @@
-import { db } from '../config';
-import { updateIndex } from './algolia';
-import { ActualDatabaseEntry } from '@living-dictionaries/types';
+import type { ActualDatabaseEntry } from '@living-dictionaries/types'
+import * as prepare from '@living-dictionaries/functions/src/algolia/prepareDataForIndex'
+import { db } from '../config-firebase'
+import { updateIndex } from './algolia'
 
 // import { prepareDataForIndex } from '@living-dictionaries/functions/src/algolia/prepareDataForIndex';
-import * as prepare from '@living-dictionaries/functions/src/algolia/prepareDataForIndex';
-// @ts-ignore
+// @ts-expect-error
 const prepareDataForIndex = prepare.default
-  .prepareDataForIndex as typeof import('@living-dictionaries/functions/src/algolia/prepareDataForIndex').prepareDataForIndex; // b/c file is declared to be commonjs by its package.json
+  .prepareDataForIndex as typeof import('@living-dictionaries/functions/src/algolia/prepareDataForIndex').prepareDataForIndex // b/c file is declared to be commonjs by its package.json
 
-const indexAllDictionaries = async () => {
-  const dictionariesSnapshot = await db.collection(`dictionaries`).get();
-  const dictionaryIds = dictionariesSnapshot.docs.map((doc) => doc.id);
-  console.log(dictionaryIds);
-  process.stdout.write(dictionaryIds + '\n');
+async function indexAllDictionaries() {
+  const dictionariesSnapshot = await db.collection(`dictionaries`).get()
+  const dictionaryIds = dictionariesSnapshot.docs.map(doc => doc.id)
+  console.log(dictionaryIds)
+  process.stdout.write(`${dictionaryIds}\n`)
 
   for (const dictionaryId of dictionaryIds)
-    await indexDictionary(dictionaryId);
-
-};
+    await indexDictionary(dictionaryId)
+}
 
 async function indexDictionary(dictionaryId: string) {
-  const entriesSnapshot = await db.collection(`dictionaries/${dictionaryId}/words`).get();
-  const entries = await prepareEntriesFromSnapshot(entriesSnapshot, dictionaryId);
-  await updateIndex(entries);
+  const entriesSnapshot = await db.collection(`dictionaries/${dictionaryId}/words`).get()
+  const entries = await prepareEntriesFromSnapshot(entriesSnapshot, dictionaryId)
+  await updateIndex(entries)
 }
 
-// eslint-disable-next-line no-undef
 async function prepareEntriesFromSnapshot(entriesSnapshot: FirebaseFirestore.QuerySnapshot<FirebaseFirestore.DocumentData>, dictionaryId: string) {
   const entryPromises = entriesSnapshot.docs.map(async (doc) => {
-    const dbEntry = doc.data() as ActualDatabaseEntry;
-    const algoliaEntry = await prepareDataForIndex(dbEntry, dictionaryId, db);
-    console.log({ dbEntry, algoliaEntry});
-    return { ...algoliaEntry, objectID: doc.id };
-  });
-
-  const entries = await Promise.all(entryPromises);
-  return entries;
+    const dbEntry = doc.data() as ActualDatabaseEntry
+    const algoliaEntry = await prepareDataForIndex(dbEntry, dictionaryId, db)
+    console.log({ dbEntry, algoliaEntry })
+    return { ...algoliaEntry, objectID: doc.id }
+  })
+
+  const entries = await Promise.all(entryPromises)
+  return entries
 }
 
 // indexAllDictionaries();

diff --git a/packages/scripts/algolia/algolia.ts b/packages/scripts/algolia/algolia.ts
@@ -1,32 +1,31 @@
-import algoliasearch from 'algoliasearch';
-import { projectId } from '../config';
-import { adminKey } from './algolia-admin-key.json';
-import { AlgoliaEntry } from '@living-dictionaries/types';
+import algoliasearch from 'algoliasearch'
+import type { AlgoliaEntry } from '@living-dictionaries/types'
+import { projectId } from '../config-firebase'
+import { adminKey } from './algolia-admin-key.json'
 
-const ALGOLIA_APP_ID = 'XCVBAYSYXD';
+const ALGOLIA_APP_ID = 'XCVBAYSYXD'
 
-export const client = algoliasearch(ALGOLIA_APP_ID, adminKey);
+export const client = algoliasearch(ALGOLIA_APP_ID, adminKey)
 
 const index = client.initIndex(
-  projectId === 'talking-dictionaries-dev' ? 'entries_dev' : 'entries_prod'
-);
+  projectId === 'talking-dictionaries-dev' ? 'entries_dev' : 'entries_prod',
+)
 
-const MAX_CHUNK_SIZE = 3000;
+const MAX_CHUNK_SIZE = 3000
 // https://www.algolia.com/doc/api-reference/api-methods/add-objects/#examples
 // if forced to iterate instead of save all at once, take note of the rate limiting at 5000 backlogged requests https://www.algolia.com/doc/faq/indexing/is-there-a-rate-limit/
 
 export async function updateIndex(entries: AlgoliaEntry[]) {
   try {
     for (let startOfChunkIndex = 0; startOfChunkIndex < entries.length; startOfChunkIndex += MAX_CHUNK_SIZE) {
-      const endOfChunk = startOfChunkIndex + MAX_CHUNK_SIZE;
-      const chunk = entries.slice(startOfChunkIndex, endOfChunk);
-      console.log({ startOfChunkIndex, endOfChunk, CHUNK_SIZE: MAX_CHUNK_SIZE, chunkLength: chunk.length });
+      const endOfChunk = startOfChunkIndex + MAX_CHUNK_SIZE
+      const chunk = entries.slice(startOfChunkIndex, endOfChunk)
+      console.log({ startOfChunkIndex, endOfChunk, CHUNK_SIZE: MAX_CHUNK_SIZE, chunkLength: chunk.length })
 
-      const { objectIDs } = await index.saveObjects(chunk);
-      console.log(`Entries indexed: ${objectIDs.length}`);
+      const { objectIDs } = await index.saveObjects(chunk)
+      console.log(`Entries indexed: ${objectIDs.length}`)
     }
   } catch (err) {
-    console.log(err);
+    console.log(err)
   }
-
 }