Skip to content

Commit

Permalink
Update media via s3 scripts (#535)
Browse files Browse the repository at this point in the history
* update import script to upload media using S3

Co-authored-by: jacob-8 <[email protected]>
  • Loading branch information
Danble and jacob-8 authored Feb 21, 2025
1 parent 05dcc14 commit a846308
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 42 deletions.
16 changes: 14 additions & 2 deletions packages/scripts/config-supabase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { createClient } from '@supabase/supabase-js'
import type { Database } from '@living-dictionaries/types'
import * as dotenv from 'dotenv'
import './record-logs'
import { S3Client } from '@aws-sdk/client-s3'

program
.option('-e, --environment [dev/prod]', 'Firebase/Supabase Project', 'dev')
Expand All @@ -14,9 +15,10 @@ export const environment = program.opts().environment === 'prod' ? 'prod' : 'dev
console.log(`Supabase running on ${environment}`)

if (environment === 'dev') {
dotenv.config({ path: '../site/.env.development' })
dotenv.config({ path: '../site/.env.development' }) // Supabase local service key
dotenv.config({ path: '../site/.env.local' }) // for dev cloud storage bucket
} else {
dotenv.config({ path: '.env.production.supabase' })
dotenv.config({ path: '../site/.env.production.local' }) // Supabase production service key and cloud storage bucket
}

export const admin_supabase = createClient<Database>(process.env.PUBLIC_SUPABASE_API_URL, process.env.SUPABASE_SERVICE_ROLE_KEY)
Expand Down Expand Up @@ -72,3 +74,13 @@ class DB {
}

export const postgres = new DB()

export const GCLOUD_MEDIA_BUCKET_S3 = new S3Client({
region: 'us',
endpoint: `https://storage.googleapis.com`,
credentials: {
accessKeyId: process.env.GCLOUD_MEDIA_BUCKET_ACCESS_KEY_ID,
secretAccessKey: process.env.GCLOUD_MEDIA_BUCKET_SECRET_ACCESS_KEY,
},
})
export const storage_bucket = `talking-dictionaries-${environment === 'prod' ? 'alpha' : 'dev'}.appspot.com`
7 changes: 3 additions & 4 deletions packages/scripts/import/generate-sql-statements.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import { randomUUID } from 'node:crypto'
import type { MultiString, TablesInsert } from '@living-dictionaries/types'
import { diego_ld_user_id } from '../config-supabase'
import { diego_ld_user_id } from '../constants'
import type { Number_Suffix, Row, Sense_Prefix } from './row.type'
import { sql_file_string } from './to-sql-string'
import { millisecond_incrementing_timestamp } from './incrementing-timestamp'

export interface Upload_Operations {
upload_photo: (filepath: string, entry_id: string) => Promise<
{ storage_path: string, serving_url: string, error: null } | { storage_path: null, serving_url: null, error: string }
{ storage_path: string, serving_url: string, error?: null } | { storage_path?: null, serving_url?: null, error: string }
>
upload_audio: (filepath: string, entry_id: string) => Promise<{ storage_path: string, error: null } | { storage_path: null, error: string }>
// upload_video: (filepath: string) => Promise<{ storage_path: string, error: null } | { storage_path: null, error: string }>
upload_audio: (filepath: string, entry_id: string) => Promise<{ storage_path: string, error?: null } | { storage_path?: null, error: string }>
}

export async function generate_sql_statements({
Expand Down
7 changes: 3 additions & 4 deletions packages/scripts/import/getImageServingUrl.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import fetch from 'node-fetch'
import { projectId } from '../config-firebase'

import 'dotenv/config' // see https://github.com/motdotla/dotenv#how-do-i-use-dotenv-with-import

export async function getImageServingUrl(imageStoragePath: string, environment: string) {
export async function getImageServingUrl(bucket_and_storage_path: string) {
if (!process.env.ProcessImageUrl)
throw new Error('Missing ProcessImageUrl, is it in your uncommitted .env file?')

try {
const imageServingUrlEndpoint = `${process.env.ProcessImageUrl}/${projectId}.appspot.com/${imageStoragePath}`
const imageServingUrlEndpoint = `${process.env.ProcessImageUrl}/${bucket_and_storage_path}`
const res = await fetch(imageServingUrlEndpoint)
const imageServingUrl = await res.text()
return imageServingUrl.replace('http://lh3.googleusercontent.com/', '')
} catch (error) {
console.log(`Error getting serving url for ${imageStoragePath} on ${environment}`)
console.log(`Error getting serving url for ${bucket_and_storage_path}`)
// @ts-ignore
throw new Error(error)
}
Expand Down
71 changes: 49 additions & 22 deletions packages/scripts/import/import-media.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import { dirname, join } from 'node:path'
import { fileURLToPath } from 'node:url'

import * as fs from 'node:fs'
import { environment, storage } from '../config-firebase.js'
import { getImageServingUrl } from './getImageServingUrl.js'

const __dirname = dirname(fileURLToPath(import.meta.url))
import { PutObjectCommand } from '@aws-sdk/client-s3'
import { GCLOUD_MEDIA_BUCKET_S3, storage_bucket } from '../config-supabase'
import { getImageServingUrl } from './getImageServingUrl'

const fileBucket = `talking-dictionaries-${environment === 'prod' ? 'alpha' : 'dev'}.appspot.com`
const __dirname = dirname(fileURLToPath(import.meta.url))

export async function upload_audio_to_gcs({
filepath,
Expand All @@ -30,16 +29,30 @@ export async function upload_audio_to_gcs({
}

try {
const [fileTypeSuffix] = filepath.match(/\.[0-9a-z]+$/i)
const storage_path = `${dictionary_id}/audio/${entry_id}_${new Date().getTime()}${fileTypeSuffix}`
const extension = filepath.split('.').pop()
const storage_path = `${dictionary_id}/audio/${entry_id}_${new Date().getTime()}.${extension}`

if (live) {
await storage.bucket(fileBucket).upload(audioFilePath, {
destination: storage_path,
metadata: {
originalFileName: filepath,
},
})
const fileStream = fs.createReadStream(audioFilePath)

const mimeTypes: Record<string, string> = {
mp3: 'audio/mpeg',
wav: 'audio/wav',
ogg: 'audio/ogg',
m4a: 'audio/mp4',
aac: 'audio/aac',
flac: 'audio/flac',
wma: 'audio/x-ms-wma',
}

const file_type = mimeTypes[extension] || 'application/octet-stream'

await GCLOUD_MEDIA_BUCKET_S3.send(new PutObjectCommand({
Bucket: storage_bucket,
Key: storage_path,
Body: fileStream,
ContentType: file_type,
}))
}
return {
storage_path,
Expand Down Expand Up @@ -71,27 +84,41 @@ export async function upload_photo_to_gcs({
}
}

const [fileTypeSuffix] = filepath.match(/\.[0-9a-z]+$/i)
const storage_path = `${dictionary_id}/images/${entry_id}_${new Date().getTime()}${fileTypeSuffix}`
const extension = filepath.split('.').pop()
const storage_path = `${dictionary_id}/images/${entry_id}_${new Date().getTime()}.${extension}`

if (!live)
return { storage_path, serving_url: 'no-serving_url-bc-dry-run' }

try {
await storage.bucket(fileBucket).upload(imageFilePath, {
destination: storage_path,
metadata: {
originalFileName: filepath,
},
})
const fileStream = fs.createReadStream(imageFilePath)

const mimeTypes: Record<string, string> = {
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
png: 'image/png',
gif: 'image/gif',
webp: 'image/webp',
svg: 'image/svg+xml',
}

const file_type = mimeTypes[extension] || 'application/octet-stream'

await GCLOUD_MEDIA_BUCKET_S3.send(new PutObjectCommand({
Bucket: storage_bucket,
Key: storage_path,
Body: fileStream,
ContentType: file_type,
}))
} catch (err) {
return {
error: `!!! Trouble uploading ${filepath}. Double-check the file to see if it is just a corrupted jpg (as some are) or if the file is good and perhaps there is code/server/network-connection problem. Error: ${err}`,
}
}

try {
const serving_url = await getImageServingUrl(storage_path, environment)
const bucket_and_storage_path = `${storage_bucket}/${storage_path}`
const serving_url = await getImageServingUrl(bucket_and_storage_path)
return {
storage_path,
serving_url,
Expand Down
12 changes: 2 additions & 10 deletions packages/scripts/import/import.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ async function import_from_spreadsheet({ dictionary_id, live }: { dictionary_id:
const file = readFileSync(`./import/data/${dictionary_id}/${dictionary_id}.csv`, 'utf8')
const rows = parseCSVFrom<Row>(file)
if (rows[0].lexeme.includes('word/phrase')) rows.shift() // remove header row
await import_data({ dictionary_id, rows, import_id, live, upload_operations: { upload_photo, upload_audio } })
await import_data({ dictionary_id, rows, import_id, live, upload_operations: { upload_audio, upload_photo } })

console.log(
`Finished ${live ? 'importing' : 'emulating'} ${rows.length} entries to ${environment === 'dev' ? 'http://localhost:3041/' : 'livingdictionaries.app/'
Expand All @@ -43,13 +43,5 @@ async function upload_photo(filepath: string, entry_id: string) {
}

async function upload_audio(filepath: string, entry_id: string) {
const storage_path = await upload_audio_to_gcs({ dictionary_id, filepath, entry_id, live })
return { storage_path }
return await upload_audio_to_gcs({ dictionary_id, filepath, entry_id, live })
}

// async function upload_video(filepath: string) {
// // TODO
// console.log({ dictionary_id })
// await new Promise(resolve => setTimeout(resolve, 0))
// return { storage_path: filepath }
// }
3 changes: 3 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a846308

Please sign in to comment.