Skip to content

Commit f9283f0

Browse files
committed
Replace data submodule by directory
1 parent 5b8ba92 commit f9283f0

File tree

10 files changed

+94
-28
lines changed

10 files changed

+94
-28
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,8 @@ output.*
44
/*.png
55
dist
66

7+
# Downloaded by src/scripts/build-data.ts
8+
data/toadua/dump.json
9+
710
# Generated from src/toaq.generic.ne by grammar-preprocessor.ts
811
src/toaq.ne

.gitmodules

-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
11
[submodule "dictionary"]
22
path = dictionary
33
url = https://github.com/toaq/dictionary
4-
[submodule "data"]
5-
path = data
6-
url = https://github.com/toaq/data

data

-1
This file was deleted.

data/toadua/toadua.json

+1
Large diffs are not rendered by default.

data/unofficial.json

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
[
2+
{ "type": "conjunction", "toaq": "búru", "gloss": "not.and" },
3+
{ "type": "conjunction", "toaq": "éru", "gloss": "and*" },
4+
{ "type": "conjunction", "toaq": "hújı", "gloss": "more.than" },
5+
{ "type": "conjunction", "toaq": "húq", "gloss": "or.rather" },
6+
{ "type": "conjunction", "toaq": "kújı", "gloss": "less.than" },
7+
{ "type": "conjunction", "toaq": "róbu", "gloss": "iff" },
8+
{ "type": "conjunction", "toaq": "rúbıe", "gloss": "and.then" },
9+
{ "type": "conjunction", "toaq": "rúbu", "gloss": "and.not" },
10+
{ "type": "conjunction", "toaq": "tóm", "gloss": "or.in.other.words" },
11+
{ "type": "determiner", "toaq": "cúaq", "gloss": "the.concept" },
12+
{ "type": "determiner", "toaq": "étu", "gloss": "each*" },
13+
{ "type": "determiner", "toaq": "lího", "gloss": "etc" },
14+
{ "type": "determiner", "toaq": "péı", "gloss": "the.number" },
15+
{ "type": "focus particle", "toaq": "cúom", "gloss": "damn" },
16+
{ "type": "focus particle", "toaq": "séu", "gloss": "if.nothing.else" },
17+
{ "type": "focus particle", "toaq": "shúq", "gloss": "merely" },
18+
{ "type": "illocution", "toaq": "daha", "gloss": "just.kidding" },
19+
{ "type": "illocution", "toaq": "hom", "gloss": "let's" },
20+
{ "type": "illocution", "toaq": "nheo", "gloss": "I.agree" },
21+
{ "type": "illocution", "toaq": "", "gloss": "IMPERATIVE" },
22+
{ "type": "illocution", "toaq": "", "gloss": "isn't.it?" },
23+
{ "type": "illocution", "toaq": "zoe", "gloss": "they.say" },
24+
{ "type": "interjection", "toaq": "aha", "gloss": "haha" },
25+
{ "type": "interjection", "toaq": "ahu", "gloss": "awoo" },
26+
{ "type": "interjection", "toaq": "baıbaı", "gloss": "bye.bye" },
27+
{ "type": "interjection", "toaq": "ehe", "gloss": "ehe" },
28+
{ "type": "interjection", "toaq": "em", "gloss": "um" },
29+
{ "type": "interjection", "toaq": "ıo", "gloss": "yeah" },
30+
{ "type": "interjection", "toaq": "je", "gloss": "new.topic" },
31+
{ "type": "interjection", "toaq": "jıkı", "gloss": "you're.welcome" },
32+
{ "type": "interjection", "toaq": "junı", "gloss": "goodbye" },
33+
{ "type": "interjection", "toaq": "koıka", "gloss": "farewell" },
34+
{ "type": "interjection", "toaq": "mıja", "gloss": "I.agree" },
35+
{ "type": "interjection", "toaq": "muana", "gloss": "for.example" },
36+
{ "type": "interjection", "toaq": "nhó", "gloss": "right?" },
37+
{ "type": "interjection", "toaq": "noga", "gloss": "so.true" },
38+
{ "type": "interjection", "toaq": "oka", "gloss": "goodbye" },
39+
{ "type": "interjection", "toaq": "reına", "gloss": "farewell" },
40+
{ "type": "interjection", "toaq": "hıba", "gloss": "come.again?" },
41+
{ "type": "interjection", "toaq": "sıanha", "gloss": "never.mind" },
42+
{ "type": "interjection", "toaq": "suena", "gloss": "please" },
43+
{ "type": "modality", "toaq": "ıaq", "gloss": "no.matter" },
44+
{ "type": "modality", "toaq": "junaı", "gloss": "given.that" },
45+
{ "type": "modality", "toaq": "noeq", "gloss": "despite" },
46+
{ "type": "prefix", "toaq": "ce-", "gloss": "of.concept" },
47+
{ "type": "prefix", "toaq": "e-", "gloss": "event.of" },
48+
{ "type": "prefix", "toaq": "hao-", "gloss": "INTR" },
49+
{ "type": "pronoun", "toaq": "áhoq", "gloss": "you.VULGAR" },
50+
{ "type": "pronoun", "toaq": "kóm", "gloss": "it" },
51+
{ "type": "pronoun", "toaq": "", "gloss": "it" },
52+
{ "type": "pronoun", "toaq": "súqneoq", "gloss": "you.POLITE" },
53+
{ "type": "tense", "toaq": "ınaı", "gloss": "NOW.ABS" },
54+
{ "type": "tense", "toaq": "tuom", "gloss": "default.tense" }
55+
]

src/bot/bot.ts

+2-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import { pngGlossSentence } from '../png-gloss';
1616
import { toEnglish } from '../english/tree';
1717
import { denotationRenderText } from '../tree/place';
1818

19-
const toaduaEntries = [...toadua.values()];
19+
const toaduaEntries = [...Object.values(toadua())];
2020

2121
export class KunaBot {
2222
constructor(private client: Client) {}
@@ -117,7 +117,6 @@ export class KunaBot {
117117
const newEntry = _.sample(
118118
toaduaEntries.filter(
119119
entry =>
120-
entry.scope === 'en' &&
121120
!entry.head.includes(' ') &&
122121
!entries.some(previous => entry.user === previous.user),
123122
),
@@ -168,7 +167,7 @@ export class KunaBot {
168167
const mode = interaction.options.getString('mode', false) ?? 'official';
169168
const author = interaction.options.getString('author', false);
170169
const ok = this.quizFilter(mode, author);
171-
const candidates = toaduaEntries.filter(e => e.scope === 'en' && ok(e));
170+
const candidates = toaduaEntries.filter(e => ok(e));
172171
if (candidates.length < r + p) {
173172
await interaction.reply('Not enough words!');
174173
return;

src/gloss.ts

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
import { Entry, dictionary } from './dictionary';
22
import { bare, clean, splitPrefixes, tone } from './tokenize';
33
import { Tone } from './types';
4-
import toaduaGlossesJson from '../data/toadua/glosses.json';
4+
import toaduaGlossesJson from '../data/toadua/toadua.json';
55

66
interface Gloss {
77
toaq: string;
88
english: string;
99
}
1010

1111
let toaduaGlosses = new Map();
12-
for (const [word, gloss] of Object.entries(toaduaGlossesJson)) {
13-
toaduaGlosses.set(word.toLowerCase(), gloss.replace(/\s+/g, '.'));
12+
for (const [word, e] of Object.entries(toaduaGlossesJson)) {
13+
if ('gloss' in e)
14+
toaduaGlosses.set(word.toLowerCase(), e.gloss.replace(/\s+/g, '.'));
1415
}
1516

1617
const words = [...toaduaGlosses.keys()]

src/scripts/build-data.ts

+14-11
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ async function fetchToadua(): Promise<{ results: any[] }> {
2626
}
2727

2828
async function readToadua(): Promise<{ results: any[] }> {
29+
if (!fs.existsSync('data/toadua')) {
30+
fs.mkdirSync('data/toadua');
31+
}
2932
if (!fs.existsSync('data/toadua/dump.json')) {
3033
const toadua = await fetchToadua();
3134
fs.writeFileSync('data/toadua/dump.json', JSON.stringify(toadua));
@@ -58,23 +61,23 @@ readToadua().then(({ results }) => {
5861
// Sort by ascending score so that higher scoring entries get processed later and overwrite earlier ones.
5962
results.sort((a, b) => a.score - b.score);
6063

61-
let glosses: Record<string, string> = {};
62-
let frames: Record<string, string> = {};
64+
let entries: Record<string, any> = {};
6365

64-
for (let entry of results) {
65-
if (entry['scope'] !== 'en') continue;
66-
if (entry['score'] < 0) continue;
67-
const head = entry['head'];
66+
for (let result of results) {
67+
if (result['scope'] !== 'en') continue;
68+
if (result['score'] < 0) continue;
69+
const head = result['head'];
6870
if (head.length >= 30) continue;
69-
const body = entry['body'];
71+
const body = result['body'];
7072
const gloss = makeGloss(body);
73+
let entry: any = { body, head, user: result.user, score: result.score };
7174
if (gloss && gloss.length <= 25) {
72-
glosses[head] = gloss;
75+
entry.gloss = gloss;
7376
}
7477
const frame = guessFrameFromDefinition(body);
75-
if (frame) frames[head] = frame;
78+
if (frame) entry.frame = frame;
79+
entries[head] = entry;
7680
}
7781

78-
fs.writeFileSync('data/toadua/glosses.json', JSON.stringify(glosses));
79-
fs.writeFileSync('data/toadua/frames.json', JSON.stringify(frames));
82+
fs.writeFileSync('data/toadua/toadua.json', JSON.stringify(entries));
8083
});

src/toadua.ts

+13-5
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,22 @@
1-
import toaduaDump from '../data/toadua/dump.json';
1+
import * as fs from 'fs';
2+
import path from 'path';
23

34
export interface ToaduaEntry {
45
head: string;
56
body: string;
67
user: string;
7-
scope: string;
88
score: number;
9+
gloss?: string;
10+
frame?: string;
911
}
1012

11-
export const toadua = new Map<string, ToaduaEntry>();
12-
for (const e of toaduaDump as ToaduaEntry[]) {
13-
toadua.set(e.head, e);
13+
let _toadua: Record<string, ToaduaEntry>;
14+
15+
export function toadua(): Record<string, ToaduaEntry> {
16+
return (_toadua ??= JSON.parse(
17+
fs.readFileSync(
18+
path.resolve(__dirname, '../data/toadua/toadua.json'),
19+
'utf8',
20+
),
21+
));
1422
}

src/tree.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { dictionary, Entry } from './dictionary';
22
import { Impossible } from './error';
3-
import { guessFrameUsingToadua } from './frame';
43
import { getFrame } from './serial';
4+
import { toadua } from './toadua';
55
import { bare, clean, repairTones, ToaqToken, tone } from './tokenize';
66
import { Tone } from './types';
77

@@ -287,7 +287,7 @@ export function makeWord([token]: [ToaqToken]): Word {
287287
gloss_abbreviation: lemmaForm,
288288
pronominal_class: 'ta',
289289
distribution: 'd',
290-
frame: guessFrameUsingToadua(lemmaForm),
290+
frame: toadua()[lemmaForm]?.frame ?? '?',
291291
english: '',
292292
subject: 'free',
293293
},

0 commit comments

Comments
 (0)