Skip to content

Commit d4f37c8

Browse files
committedFeb 11, 2025
[Ai-written] Reading local spreadsheet
take target language as a command line argument using google translate improve command line refactor
1 parent e0654c1 commit d4f37c8

10 files changed

+1613
-0
lines changed
 

‎.gitignore

+177
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore
2+
3+
# Logs
4+
5+
logs
6+
_.log
7+
npm-debug.log_
8+
yarn-debug.log*
9+
yarn-error.log*
10+
lerna-debug.log*
11+
.pnpm-debug.log*
12+
13+
# Caches
14+
15+
.cache
16+
17+
# Diagnostic reports (https://nodejs.org/api/report.html)
18+
19+
report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
20+
21+
# Runtime data
22+
23+
pids
24+
_.pid
25+
_.seed
26+
*.pid.lock
27+
28+
# Directory for instrumented libs generated by jscoverage/JSCover
29+
30+
lib-cov
31+
32+
# Coverage directory used by tools like istanbul
33+
34+
coverage
35+
*.lcov
36+
37+
# nyc test coverage
38+
39+
.nyc_output
40+
41+
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
42+
43+
.grunt
44+
45+
# Bower dependency directory (https://bower.io/)
46+
47+
bower_components
48+
49+
# node-waf configuration
50+
51+
.lock-wscript
52+
53+
# Compiled binary addons (https://nodejs.org/api/addons.html)
54+
55+
build/Release
56+
57+
# Dependency directories
58+
59+
node_modules/
60+
jspm_packages/
61+
62+
# Snowpack dependency directory (https://snowpack.dev/)
63+
64+
web_modules/
65+
66+
# TypeScript cache
67+
68+
*.tsbuildinfo
69+
70+
# Optional npm cache directory
71+
72+
.npm
73+
74+
# Optional eslint cache
75+
76+
.eslintcache
77+
78+
# Optional stylelint cache
79+
80+
.stylelintcache
81+
82+
# Microbundle cache
83+
84+
.rpt2_cache/
85+
.rts2_cache_cjs/
86+
.rts2_cache_es/
87+
.rts2_cache_umd/
88+
89+
# Optional REPL history
90+
91+
.node_repl_history
92+
93+
# Output of 'npm pack'
94+
95+
*.tgz
96+
97+
# Yarn Integrity file
98+
99+
.yarn-integrity
100+
101+
# dotenv environment variable files
102+
103+
.env
104+
.env.development.local
105+
.env.test.local
106+
.env.production.local
107+
.env.local
108+
109+
# parcel-bundler cache (https://parceljs.org/)
110+
111+
.parcel-cache
112+
113+
# Next.js build output
114+
115+
.next
116+
out
117+
118+
# Nuxt.js build / generate output
119+
120+
.nuxt
121+
dist
122+
123+
# Gatsby files
124+
125+
# Comment in the public line in if your project uses Gatsby and not Next.js
126+
127+
# https://nextjs.org/blog/next-9-1#public-directory-support
128+
129+
# public
130+
131+
# vuepress build output
132+
133+
.vuepress/dist
134+
135+
# vuepress v2.x temp and cache directory
136+
137+
.temp
138+
139+
# Docusaurus cache and generated files
140+
141+
.docusaurus
142+
143+
# Serverless directories
144+
145+
.serverless/
146+
147+
# FuseBox cache
148+
149+
.fusebox/
150+
151+
# DynamoDB Local files
152+
153+
.dynamodb/
154+
155+
# TernJS port file
156+
157+
.tern-port
158+
159+
# Stores VSCode versions used for testing VSCode extensions
160+
161+
.vscode-test
162+
163+
# yarn v2
164+
165+
.yarn/cache
166+
.yarn/unplugged
167+
.yarn/build-state.yml
168+
.yarn/install-state.gz
169+
.pnp.*
170+
171+
# IntelliJ based IDEs
172+
.idea
173+
174+
# Finder (MacOS) folder config
175+
.DS_Store
176+
177+
*.xlsx

‎.vscode/settings.json

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"editor.formatOnSave": true,
3+
"editor.defaultFormatter": "vscode.typescript-language-features"
4+
}

‎bun.lockb

41.6 KB
Binary file not shown.

‎index.ts

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/env bun
2+
import * as XLSX from 'xlsx';
3+
import { unlink } from 'node:fs/promises';
4+
import { existsSync } from 'node:fs';
5+
import { translateToLanguage } from './translate';
6+
import { Command } from 'commander';
7+
import { createRequire } from 'module';
8+
import { resolve, basename, dirname } from 'node:path';
9+
10+
const require = createRequire(import.meta.url);
11+
const { version } = require('./package.json');
12+
13+
interface Row {
14+
"[en]": string;
15+
[key: string]: any;
16+
}
17+
18+
async function main() {
19+
const program = new Command();
20+
21+
program
22+
.name('bloom-translate-spreadsheet')
23+
.description('Translates Bloom spreadsheet content to different languages')
24+
.argument('<inputPath>', 'Input Excel file path')
25+
.option('-o, --output <path>', 'Output Excel file path (default: {input-filename}-{language}.xlsx)')
26+
.option('--target <tag>', 'BCP47 language code with model. If this is not provided, the program will look in the input spreadsheet for things columns translate. If it is specified, then the program will either add or re-use a column with the give tag. Example: fr-x-ai-gt would add or reuse a French column translated with Google Translate)', 'fr-x-ai-gt')
27+
.option('--retranslate', 'If this is provided, then columns will be replaced if they already exist. Otherwise, the program will not re-translate.')
28+
.version(version)
29+
.addHelpText('after', `
30+
Example:
31+
$ bloom-translate-spreadsheet foo.xlsx
32+
$ bloom-translate-spreadsheet foo.xlsx --target es-x-ai-gt -o foo-with-spanish.xlsx
33+
$ bloom-translate-spreadsheet foo.xlsx --target fr-x-ai-gt --retranslate`);
34+
35+
program.parse();
36+
37+
const options = program.opts();
38+
const targetLangAndModel = options.target;
39+
const shouldRetranslate = options.retranslate;
40+
const inputPath = resolve(program.args[0]);
41+
const sheetName = "BloomBook";
42+
43+
// The column name should be the full language code including the model
44+
const columnName = `[${targetLangAndModel}]`;
45+
46+
// Generate default output path in the current directory
47+
const inputBasename = basename(inputPath);
48+
const defaultOutputPath = resolve(process.cwd(), inputBasename.replace(/\.xlsx$/, `-${targetLangAndModel}.xlsx`));
49+
const outputPath = options.output ? resolve(options.output) : defaultOutputPath;
50+
51+
if (!existsSync(inputPath)) {
52+
console.error(`Input file not found: ${inputPath}`);
53+
process.exit(1);
54+
}
55+
56+
// Delete existing file if it exists
57+
if (existsSync(outputPath)) {
58+
await unlink(outputPath);
59+
}
60+
61+
// Read the Excel file
62+
const workbook = XLSX.readFile(inputPath);
63+
64+
// Verify the sheet exists
65+
if (!workbook.SheetNames.includes(sheetName)) {
66+
console.error(`Sheet "${sheetName}" not found in workbook. Available sheets: ${workbook.SheetNames.join(', ')}`);
67+
process.exit(1);
68+
}
69+
70+
const sheet = workbook.Sheets[sheetName];
71+
72+
// Convert the sheet to JSON
73+
const inputSheet = XLSX.utils.sheet_to_json<Row>(sheet);
74+
75+
// Get current headers from the first row
76+
const headers = Object.keys(inputSheet[0] || {});
77+
78+
// if shouldRetranslate is false and the column is already there, print something and quit
79+
if (!shouldRetranslate && headers.includes(columnName)) {
80+
console.error(`Column ${columnName} already exists in the spreadsheet. Use --retranslate flag to overwrite.`);
81+
process.exit(1);
82+
}
83+
84+
// Find the position of [en] column and create new header array
85+
const enIndex = headers.indexOf('[en]');
86+
const newHeaders = [...headers];
87+
88+
// If we don't yet have a column for the target language and model, insert it right after the [en] column.
89+
if (enIndex !== -1 && !headers.includes(columnName)) {
90+
newHeaders.splice(enIndex + 1, 0, columnName);
91+
}
92+
93+
// Create translations for texts
94+
const textsToTranslate = inputSheet
95+
.map(row => row['[en]'])
96+
.filter(text => text); // Filter out any undefined or empty strings
97+
98+
const translations = await translateToLanguage(textsToTranslate, targetLangAndModel);
99+
100+
// Map the translations back to the rows
101+
let translationIndex = 0;
102+
for (const row of inputSheet) {
103+
if (row['[en]']) {
104+
row[columnName] = translations[translationIndex++];
105+
}
106+
}
107+
108+
// Create a new workbook
109+
const newWorkbook = XLSX.utils.book_new();
110+
111+
// Convert back to sheet with the correct column order
112+
const newSheet = XLSX.utils.json_to_sheet(inputSheet, {
113+
header: newHeaders
114+
});
115+
116+
XLSX.utils.book_append_sheet(newWorkbook, newSheet, "BloomBook");
117+
118+
// Write to file
119+
XLSX.writeFile(newWorkbook, outputPath);
120+
console.log(`Translated spreadsheet saved to: ${outputPath}`);
121+
}
122+
123+
main().catch(console.error);

‎package-lock.json

+1,198
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎package.json

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"name": "bloom-translate-spreadsheet",
3+
"version": "0.0.1",
4+
"module": "index.ts",
5+
"type": "module",
6+
"scripts": {
7+
"start": "bun run index.ts"
8+
},
9+
"devDependencies": {
10+
"@types/bun": "latest"
11+
},
12+
"peerDependencies": {
13+
"typescript": "^5.0.0"
14+
},
15+
"dependencies": {
16+
"@google-cloud/translate": "^8.5.0",
17+
"commander": "^13.1.0",
18+
"xlsx": "^0.18.5"
19+
}
20+
}

‎src/googleTranslate.ts

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import { v2 } from '@google-cloud/translate';
2+
3+
// targetLang: language code that normally ends in a private use subtag of
4+
// the form "-x-ai-model" where "model" tells us which translation model to use/was used.
5+
export async function translateWithGoogleTranslate(
6+
englishTexts: string[],
7+
targetLang: string,
8+
serviceAccountEmail: string,
9+
serviceAccountPrivateKey: string
10+
): Promise<string[]> {
11+
const translate = new v2.Translate({
12+
credentials: {
13+
client_email: serviceAccountEmail,
14+
private_key: serviceAccountPrivateKey
15+
}
16+
});
17+
try {
18+
const [translations] = await translate.translate(englishTexts, targetLang);
19+
return Array.isArray(translations)
20+
? translations
21+
: [translations];
22+
} catch (error) {
23+
console.error('Translation error:', error);
24+
throw error;
25+
}
26+
}

‎test-data/moon-and-cap.xlsx

25.9 KB
Binary file not shown.

‎translate.ts

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import { v2 } from '@google-cloud/translate';
2+
import { translateWithGoogleTranslate } from './src/googleTranslate';
3+
4+
type TranslationModel = 'gt' | 'acts2';
5+
6+
7+
export function parseModelFromLanguageCode(langCode: string): TranslationModel | null {
8+
if (langCode.includes('-acts2')) return 'acts2';
9+
if (langCode.includes('-gt')) return 'gt';
10+
return null;
11+
}
12+
13+
export async function translateToLanguage(englishTexts: string[], targetCode: string): Promise<string[]> {
14+
const model = parseModelFromLanguageCode(targetCode);
15+
16+
if (model === 'gt') {
17+
if (!process.env.BLOOM_GOOGLE_TRANSLATION_SERVICE_ACCOUNT_EMAIL) {
18+
throw new Error('Translating with Google requires the environment variables: BLOOM_GOOGLE_TRANSLATION_SERVICE_ACCOUNT_EMAIL. After setting it (and also BLOOM_GOOGLE_SERVICE_PRIVATE_KEY), you may have to restart your terminal.');
19+
}
20+
if (!process.env.BLOOM_GOOGLE_TRANSLATION_SERVICE_PRIVATE_KEY) {
21+
throw new Error('Translating with Google requires the environment variables: BLOOM_GOOGLE_TRANSLATION_SERVICE_PRIVATE_KEY. After setting it (and also BLOOM_GOOGLE_SERVICE_ACCOUNT_EMAIL), you may have to restart your terminal.');
22+
}
23+
24+
return await translateWithGoogleTranslate(englishTexts, targetCode.split('-x-')[0], process.env.BLOOM_GOOGLE_TRANSLATION_SERVICE_ACCOUNT_EMAIL, process.env.BLOOM_GOOGLE_TRANSLATION_SERVICE_PRIVATE_KEY.replace(
25+
/\\n/g,
26+
"\n"
27+
));
28+
}
29+
if (model === 'acts2') {
30+
return englishTexts.map(text => `[Acts2 pretend] ${text}`);
31+
}
32+
if (model === 'piglatin') {
33+
return englishTexts.map(text => text.split(' ').map(word => `${word.slice(1)}${word[0]}ay`).join(' '));
34+
}
35+
else {
36+
throw new Error('Unknown translation model ${model}');
37+
}
38+
}

‎tsconfig.json

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"compilerOptions": {
3+
// Enable latest features
4+
"lib": ["ESNext", "DOM"],
5+
"target": "ESNext",
6+
"module": "ESNext",
7+
"moduleDetection": "force",
8+
"jsx": "react-jsx",
9+
"allowJs": true,
10+
11+
// Bundler mode
12+
"moduleResolution": "bundler",
13+
"allowImportingTsExtensions": true,
14+
"verbatimModuleSyntax": true,
15+
"noEmit": true,
16+
17+
// Best practices
18+
"strict": true,
19+
"skipLibCheck": true,
20+
"noFallthroughCasesInSwitch": true,
21+
22+
// Some stricter flags (disabled by default)
23+
"noUnusedLocals": false,
24+
"noUnusedParameters": false,
25+
"noPropertyAccessFromIndexSignature": false
26+
}
27+
}

0 commit comments

Comments
 (0)
Please sign in to comment.