From a73056abde1c3f5e401dd42e681101396086c9a2 Mon Sep 17 00:00:00 2001 From: YR Chen Date: Wed, 24 Jul 2024 03:07:57 +0800 Subject: [PATCH] winget-source: sync metadata using V2 index (#120) * winget-source: sync metadata required by V2 index * winget-source: extract common logic of syncing * update * add success log * winget-source: trim trailing spaces * winget-source: fix unexpected `undefined` in sync * winget-source: bump package version --- winget-source/package-lock.json | 18 ++++- winget-source/package.json | 5 +- winget-source/sync-repo.js | 127 +++++++++++++++++++++----------- winget-source/utilities.js | 125 +++++++++++++++++++++++-------- 4 files changed, 195 insertions(+), 80 deletions(-) diff --git a/winget-source/package-lock.json b/winget-source/package-lock.json index dcf8987..1df0cdc 100644 --- a/winget-source/package-lock.json +++ b/winget-source/package-lock.json @@ -1,12 +1,12 @@ { "name": "@ustcmirror/winget-source", - "version": "1.1.0", + "version": "1.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@ustcmirror/winget-source", - "version": "1.1.0", + "version": "1.2.0", "license": "MIT", "dependencies": { "async": "^3.2.4", @@ -15,7 +15,8 @@ "node-fetch": "^3.3.1", "promised-sqlite3": "^2.1.0", "sqlite3": "^5.1.5", - "winston": "^3.8.2" + "winston": "^3.8.2", + "yaml": "^2.4.5" } }, "node_modules/@colors/colors": { @@ -1747,6 +1748,17 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" + }, + "node_modules/yaml": { + "version": "2.4.5", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.4.5.tgz", + "integrity": "sha512-aBx2bnqDzVOyNKfsysjA2ms5ZlnjSAW2eG3/L5G/CSujfjLJTJsEw1bGw8kCf04KodQWk1pxlGnZ56CRxiawmg==", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14" + } } } } diff --git a/winget-source/package.json b/winget-source/package.json index 84d9dbe..5017925 100644 --- a/winget-source/package.json +++ b/winget-source/package.json @@ -1,6 +1,6 @@ { "name": "@ustcmirror/winget-source", - "version": "1.1.0", + "version": "1.2.0", "description": "Sync with pre-indexed WinGet source repository.", "main": "sync-repo.js", "author": "YR Chen ", @@ -13,7 +13,8 @@ "node-fetch": "^3.3.1", "promised-sqlite3": "^2.1.0", "sqlite3": "^5.1.5", - "winston": "^3.8.2" + "winston": "^3.8.2", + "yaml": "^2.4.5" }, "scripts": { "start": "node sync-repo.js" diff --git a/winget-source/sync-repo.js b/winget-source/sync-repo.js index 920b464..5f205da 100644 --- a/winget-source/sync-repo.js +++ b/winget-source/sync-repo.js @@ -3,72 +3,111 @@ import async from 'async' import { rm } from 'fs/promises' import { AsyncDatabase } from 'promised-sqlite3' -import { EX_IOERR, EX_OK, EX_SOFTWARE, EX_TEMPFAIL, EX_UNAVAILABLE } from './sysexits.js' +import { EX_IOERR, EX_SOFTWARE, EX_TEMPFAIL, EX_UNAVAILABLE } from './sysexits.js' import { + buildManifestURIs, + buildManifestURIsFromPackageMetadata, + buildPackageMetadataURIs, buildPathpartMap, - buildURIList, + cacheFileWithURI, exitWithCode, extractDatabaseFromBundle, - getLocalPath, makeTempDirectory, - saveFile, setupEnvironment, - syncFile + syncFile, } from './utilities.js' -const sourceV1Filename = 'source.msix'; -const sourceV2Filename = 'source2.msix'; - -// set up configs and temp directory const { parallelLimit, remote, sqlite3, winston } = setupEnvironment(); -const tempDirectory = await makeTempDirectory('winget-repo-'); - -winston.info(`start syncing with ${remote}`); - -try { - // download V1 index package to buffer - const [indexBuffer, modifiedDate, updated] = await syncFile(sourceV1Filename, true, false); - if (!updated) { - winston.info(`nothing to sync from ${remote}`); - exitWithCode(EX_OK); - } - assert(indexBuffer !== null, "Failed to get the source index buffer!"); - // unpack, extract and load index database +/** + * Sync with the official WinGet repository index. + * + * @param {number} version WinGet index version to sync. + * @param {(db: AsyncDatabase) => Promise} handler Handler function that reads the index database and syncs necessary files. + * + * @returns {Promise} Fulfills with `undefined` upon success. + */ +async function syncIndex(version, handler) { + const tempDirectory = await makeTempDirectory('winget-repo-'); + const sourceFilename = version > 1 ? `source${version}.msix` : 'source.msix'; try { - const databaseFilePath = await extractDatabaseFromBundle(indexBuffer, tempDirectory); - const rawDatabase = new sqlite3.Database(databaseFilePath, sqlite3.OPEN_READONLY); + // download index package to buffer + const [indexBuffer, modifiedDate, updated] = await syncFile(sourceFilename, true, false); + if (!updated) { + winston.info(`skip syncing version ${version} from ${remote}`); + return; + } + assert(indexBuffer !== null, "Failed to get the source index buffer!"); - // read manifest URIs from index database + // unpack, extract and load index database try { - const db = new AsyncDatabase(rawDatabase) - const pathparts = buildPathpartMap(await db.all('SELECT * FROM pathparts')); - const uris = buildURIList(await db.all('SELECT pathpart FROM manifest ORDER BY rowid DESC'), pathparts); - await db.close() - - // sync latest manifests in parallel + const databaseFilePath = await extractDatabaseFromBundle(indexBuffer, tempDirectory); + const database = new sqlite3.Database(databaseFilePath, sqlite3.OPEN_READONLY); try { - await async.eachLimit(uris, parallelLimit, async (uri) => await syncFile(uri, false)); + // sync files with handler + const asyncDatabase = new AsyncDatabase(database); + await handler(asyncDatabase); + await asyncDatabase.close(); } catch (error) { - exitWithCode(EX_TEMPFAIL, error); + exitWithCode(EX_SOFTWARE, error); } } catch (error) { - exitWithCode(EX_SOFTWARE, error); + exitWithCode(EX_IOERR, error); } + + // update index package + await cacheFileWithURI(sourceFilename, indexBuffer, modifiedDate); } catch (error) { - exitWithCode(EX_IOERR, error); + try { + await rm(tempDirectory, { recursive: true }); + } finally { + exitWithCode(EX_UNAVAILABLE, error); + } } - - // update index packages - await saveFile(getLocalPath(sourceV1Filename), indexBuffer, modifiedDate); - await syncFile(sourceV2Filename, true); -} catch (error) { - exitWithCode(EX_UNAVAILABLE, error); + winston.info(`successfully synced version ${version} from ${remote}`); + await rm(tempDirectory, { recursive: true }); } -winston.info(`successfully synced with ${remote}`); +winston.info(`start syncing with ${remote}`); + +await syncIndex(2, async (db) => { + try { + const packageURIs = buildPackageMetadataURIs(await db.all('SELECT id, hash FROM packages')); + try { + // sync latest package metadata in parallel + const manifestURIs = await async.concatLimit(packageURIs, parallelLimit, async (uri) => { + const [metadataBuffer] = await syncFile(uri, false); + try { + return metadataBuffer ? await buildManifestURIsFromPackageMetadata(metadataBuffer) : []; + } catch (error) { + exitWithCode(EX_SOFTWARE, error); + } + }); + // sync latest manifests in parallel + await async.eachLimit(manifestURIs, parallelLimit, async (uri) => await syncFile(uri, false)); + } catch (error) { + exitWithCode(EX_TEMPFAIL, error); + } + } catch (error) { + exitWithCode(EX_SOFTWARE, error); + } +}); -// clean up temp directory -await rm(tempDirectory, { recursive: true }); +await syncIndex(1, async (db) => { + try { + const pathparts = buildPathpartMap(await db.all('SELECT * FROM pathparts')); + const uris = buildManifestURIs(await db.all('SELECT pathpart FROM manifest ORDER BY rowid DESC'), pathparts); + // sync latest manifests in parallel + try { + await async.eachLimit(uris, parallelLimit, async (uri) => await syncFile(uri, false)); + } catch (error) { + exitWithCode(EX_TEMPFAIL, error); + } + } catch (error) { + exitWithCode(EX_SOFTWARE, error); + } +}); + +winston.info(`successfully synced with ${remote}`); diff --git a/winget-source/utilities.js b/winget-source/utilities.js index 4bba127..b47ae97 100644 --- a/winget-source/utilities.js +++ b/winget-source/utilities.js @@ -7,10 +7,13 @@ import path from 'path' import process from 'process' import sqlite3 from 'sqlite3' import winston from 'winston' +import YAML from 'yaml' +import Zlib from 'zlib' import { existsSync } from 'fs' import { mkdir, mkdtemp, readFile, stat, utimes, writeFile } from 'fs/promises' import { isIP } from 'net' +import { promisify } from 'util' import { EX_IOERR, EX_USAGE } from './sysexits.js' @@ -49,6 +52,47 @@ const debugMode = process.env.DEBUG === 'true'; /** Local IP address to be bound to HTTPS requests. */ const localAddress = process.env.BIND_ADDRESS; +/** Decompress a deflated stream asynchronously. */ +const inflateRaw = promisify(Zlib.inflateRaw); + +/** + * Get the local sync path of a manifest. + * + * @param {string} uri Manifest URI. + * + * @returns {string} Expected local path of the manifest file. + */ +function getLocalPath(uri) { + return path.join(local, uri); +} + +/** + * Get the remote URL of a manifest. + * + * @param {string} uri Manifest URI. + * + * @returns {URL} Remote URL to get the manifest from. + */ +function getRemoteURL(uri) { + const remoteURL = new URL(remote); + remoteURL.pathname = path.posix.join(remoteURL.pathname, uri); + return remoteURL; +} + +/** + * Decompress a MSZIP-compressed buffer. + * + * @param {Buffer} buffer Compressed buffer using MSZIP. + * + * @returns {Buffer} The decompressed buffer. + */ +async function decompressMSZIP(buffer) { + if (buffer.toString('ascii', 28, 30) != 'CK') { + throw new Error('Invalid MSZIP format'); + } + return await inflateRaw(buffer.subarray(30)); +} + /** * Get last modified date from HTTP response headers. * @@ -97,6 +141,19 @@ function resolvePathpart(id, pathparts) { return path.posix.join(resolvePathpart(pathpart.parent, pathparts), pathpart.pathpart); } +/** + * Resolve manifest URIs against package metadata. + * + * Reference: https://github.com/microsoft/winget-cli/blob/master/src/AppInstallerCommonCore/PackageVersionDataManifest.cpp + * + * @param {{ sV: string, vD: { v: string, rP: string | undefined, s256H: string | undefined }[], [key: string]: any }} metadata The parsed package metadata object. + * + * @returns {string[]} URIs resolved from the given metadata. + */ +function resolvePackageManifestURIs(metadata) { + return metadata.vD.map((version) => version.rP).filter(Boolean); +} + /** * Set up the default `winston` logger instance. */ @@ -141,10 +198,23 @@ export function buildPathpartMap(rows) { * * @returns {string[]} Manifest URIs to sync. */ -export function buildURIList(rows, pathparts) { +export function buildManifestURIs(rows, pathparts) { return rows.map(row => resolvePathpart(row.pathpart, pathparts)); } +/** + * Build a list of all package metadata URIs from database query. + * + * @param {{ id: string, hash: Buffer, [key: string]: string }[]} rows Rows returned by the query. + * + * @returns {string[]} Package metadata URIs to sync. + */ +export function buildPackageMetadataURIs(rows) { + return rows.map(row => + path.posix.join('packages', row.id, row.hash.toString('hex').slice(0, 8), 'versionData.mszyml') + ); +} + /** * Exit with given status with error logging. * @@ -161,6 +231,22 @@ export function exitWithCode(code = 0, error = undefined) { process.exit(code); } +/** + * Build a list of all manifest URIs from compressed package metadata. + * + * Reference: https://github.com/kyz/libmspack/blob/master/libmspack/mspack/mszipd.c + * + * @param {fs.PathLike | Buffer} mszymlMetadata Path or buffer of the MSZYML metadata file. + * + * @returns {Promise} Manifest URIs to sync. + */ +export async function buildManifestURIsFromPackageMetadata(mszymlMetadata) { + const compressedBuffer = Buffer.isBuffer(mszymlMetadata) ? mszymlMetadata : await readFile(mszymlMetadata); + const buffer = await decompressMSZIP(compressedBuffer); + const metadata = YAML.parse(buffer.toString()); + return resolvePackageManifestURIs(metadata); +} + /** * Extract database file from the source bundle. * @@ -170,7 +256,7 @@ export function exitWithCode(code = 0, error = undefined) { * @returns {Promise} Path of the extracted `index.db` file. */ export async function extractDatabaseFromBundle(msixFile, directory) { - const bundle = (msixFile instanceof Buffer) ? msixFile : await readFile(msixFile); + const bundle = Buffer.isBuffer(msixFile) ? msixFile : await readFile(msixFile); const zip = await JSZip.loadAsync(bundle); const buffer = await zip.file(path.posix.join('Public', 'index.db')).async('Uint8Array'); const destination = path.join(directory, 'index.db'); @@ -178,30 +264,6 @@ export async function extractDatabaseFromBundle(msixFile, directory) { return destination; } -/** - * Get the local sync path of a manifest. - * - * @param {string} uri Manifest URI. - * - * @returns {string} Expected local path of the manifest file. - */ -export function getLocalPath(uri) { - return path.join(local, uri); -} - -/** - * Get the remote URL of a manifest. - * - * @param {string} uri Manifest URI. - * - * @returns {URL} Remote URL to get the manifest from. - */ -export function getRemoteURL(uri) { - const remoteURL = new URL(remote); - remoteURL.pathname = path.posix.join(remoteURL.pathname, uri); - return remoteURL; -} - /** * Create a unique temporary directory with given prefix. * @@ -242,15 +304,16 @@ export function setupEnvironment() { } /** - * Save a file with specific modified date. + * Cache a file with specific modified date. * - * @param {string} path File path to write to. + * @param {string} uri File URI to cache. * @param {Buffer} buffer Whether to save the file to disk. * @param {Date | null | undefined} modifiedAt Modified date of the file, if applicable. * - * @returns {Promise} Fulfills with `undefined` with upon success. + * @returns {Promise} Fulfills with `undefined` upon success. */ -export async function saveFile(path, buffer, modifiedAt) { +export async function cacheFileWithURI(uri, buffer, modifiedAt) { + const path = getLocalPath(uri); await writeFile(path, buffer); if (modifiedAt) { await utimes(path, modifiedAt, modifiedAt); @@ -292,7 +355,7 @@ export async function syncFile(uri, update = true, save = true) { const buffer = Buffer.from(arrayBuffer); const lastModified = getLastModifiedDate(response); if (save) { - await saveFile(localPath, buffer, lastModified); + await cacheFileWithURI(uri, buffer, lastModified); } return [buffer, lastModified ?? null, true]; }