Merge pull request #2685 from odota/readblob

make blobstore the primary
odota · Dec 22, 2023 · 3e00171 · 3e00171
2 parents ef42ae2 + eada04f
commit 3e00171
Show file tree

Hide file tree

Showing 121 changed files with 37,320 additions and 4,876 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -5,19 +5,3 @@ updates:
     schedule:
       interval: daily
     open-pull-requests-limit: 10
-    ignore:
-      - dependency-name: stripe
-        versions:
-          - 8.132.0
-          - 8.133.0
-          - 8.134.0
-          - 8.135.0
-          - 8.136.0
-          - 8.137.0
-          - 8.138.0
-          - 8.139.0
-          - 8.140.0
-          - 8.141.0
-          - 8.142.0
-          - 8.143.0
-          - 8.145.0
diff --git a/.gitignore b/.gitignore
@@ -25,4 +25,5 @@ docker-compose.override.yml
 STEAM_ACCOUNT_DATA*.txt
 .DS_Store
 .vscode
-build
+build
+.nyc_output
diff --git a/.prettierignore b/.prettierignore
@@ -0,0 +1 @@
+json
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM node:21-bullseye
+FROM node:20
 
 ENV NPM_CONFIG_LOGLEVEL warn
 

diff --git a/config.js → config.ts b/config.js → config.ts
@@ -1,29 +1,28 @@
 /**
  * File managing configuration for the application
  * */
-require('dotenv').config();
+import 'dotenv/config';
 
 const defaults = {
   STEAM_API_KEY: '', // for API reqs, in worker
   STEAM_USER: '', // for getting replay salt/profile data, in retriever
-  STEAM_PASS: '',
+  STEAM_PASS: '', // for getting replay salt/profile data, in retriever
   ROLE: '', // for specifying the file that should be run when entry point is invoked
   GROUP: '', // for specifying the group of apps that should be run when entry point is invoked
   START_SEQ_NUM: '', // truthy: use sequence number stored in redis, else: use approximate value from live API
   PROVIDER: '', // The cloud provider used by the application (determines how environment data is downloaded)
   STEAM_ACCOUNT_DATA: '', // The URL to read Steam account data from
   NODE_ENV: 'development',
   PORT: '', // Default port to use by services often set by the system
-  FRONTEND_PORT: '5000',
-  RETRIEVER_PORT: '5100',
-  PARSER_PORT: '5200',
-  PROXY_PORT: '5300',
+  FRONTEND_PORT: '5000', // Port to run the webserver/API on
+  RETRIEVER_PORT: '5100', // Port to run the Steam GC retriever on
+  PARSER_PORT: '5200', // Port to run the parser service on. Note: This is the JS service that processes jobs, not the Java server that actually parses replays (PARSER_HOST)
+  PROXY_PORT: '5300', // Port to run the Steam API proxy on
   ROOT_URL: 'http://localhost:5000', // base url to redirect to after steam oauth login
   RETRIEVER_HOST: 'localhost:5100', // Comma separated list of retriever hosts (access to Dota 2 GC data)
   GCDATA_RETRIEVER_HOST: '', // Comma separated list of retriever hosts dedicated for gcdata job
-  PARSER_HOST: 'http://localhost:5600', // host of the parse server
+  PARSER_HOST: 'http://localhost:5600', // host of the Java parse server
   UI_HOST: '', // The host of the UI, target of /logout and /return
-  PROXY_URLS: '', // comma separated list of proxy urls to use
   STEAM_API_HOST: 'api.steampowered.com', // comma separated list of hosts to fetch Steam API data from
   POSTGRES_URL: 'postgresql://postgres:postgres@localhost/yasp', // connection string for PostgreSQL
   READONLY_POSTGRES_URL: 'postgresql://readonly:readonly@localhost/yasp', // readonly connection string for PostgreSQL
@@ -34,46 +33,47 @@ const defaults = {
   RETRIEVER_SECRET: '', // string to use as shared secret with retriever/parser
   SESSION_SECRET: 'secret to encrypt cookies with', // string to encrypt cookies
   COOKIE_DOMAIN: '', // domain to use for the cookie.  Use e.g. '.opendota.com' to share cookie across subdomains
-  UNTRACK_DAYS: 30, // The number of days a user is tracked for after every visit
-  GOAL: 5, // The cheese goal
-  DEFAULT_DELAY: 1000, // delay between API requests
-  SCANNER_DELAY: 2000, // delay for scanner API requests (stricter rate limit)
-  MMR_PARALLELISM: 10, // Number of simultaneous MMR requests to make (per retriever)
-  PARSER_PARALLELISM: 1, // Number of simultaneous parse jobs to run (per parser)
-  BENCHMARK_RETENTION_MINUTES: 60, // minutes in block to retain benchmark data for percentile
-  GCDATA_PERCENT: 0, // percent of inserted matches to randomly queue for GC data
-  SCANNER_PERCENT: 100, // percent of matches to insert from scanner
-  PUBLIC_SAMPLE_PERCENT: 10, // percent of public matches to sample in DB
-  SCENARIOS_SAMPLE_PERCENT: 100, // percent of parsed matches to sample for scenarios
-  BENCHMARKS_SAMPLE_PERCENT: 100, // percent of parsed matches to sample for benchmarks
+  UNTRACK_DAYS: '30', // The number of days a user is tracked for after every visit
+  MMR_PARALLELISM: '1', // Number of simultaneous MMR requests to make (per retriever)
+  PARSER_PARALLELISM: '1', // Number of simultaneous parse jobs to run (per parser)
+  FULLHISTORY_PARALLELISM: '1', // Number of simultaneous fullhistory (player refresh) jobs to process
+  GCDATA_PARALLELISM: '1', // Number of simultaneous GC match details requests to make (per retriever)
+  BENCHMARK_RETENTION_MINUTES: '60', // minutes in block to retain benchmark data for percentile
+  GCDATA_PERCENT: '0', // percent of inserted matches to randomly queue for GC data
+  SCANNER_PERCENT: '100', // percent of matches to insert from scanner
+  PUBLIC_SAMPLE_PERCENT: '10', // percent of public matches to sample in DB
+  SCENARIOS_SAMPLE_PERCENT: '100', // percent of parsed matches to sample for scenarios
+  BENCHMARKS_SAMPLE_PERCENT: '100', // percent of parsed matches to sample for benchmarks
   ENABLE_MATCH_CACHE: '', // set to enable caching matches in Redis
   ENABLE_PLAYER_CACHE: '', // enable/disable player aggregation caching
   ENABLE_RANDOM_MMR_UPDATE: '', // set to request MMR updates after ranked matches
-  MAXIMUM_AGE_SCENARIOS_ROWS: 4, // maximum allowed age of scenarios rows in weeks
-  MATCH_CACHE_SECONDS: 60, // number of seconds to cache matches
-  PLAYER_CACHE_SECONDS: 1800, // number of seconds to cache player aggregations
-  SCANNER_PLAYER_PERCENT: 100, // percent of matches from scanner to insert player account IDs for (discover new player account IDs)
-  ENABLE_RETRIEVER_ADVANCED_AUTH: '', // set to enable retriever two-factor and SteamGuard authentication,
+  MAXIMUM_AGE_SCENARIOS_ROWS: '4', // maximum allowed age of scenarios rows in weeks
+  MATCH_CACHE_SECONDS: '60', // number of seconds to cache matches
+  PLAYER_CACHE_SECONDS: '1800', // number of seconds to cache player aggregations
+  SCANNER_PLAYER_PERCENT: '100', // percent of matches from scanner to insert player account IDs for (discover new player account IDs)
   ENABLE_API_LIMIT: '', // if truthy, API calls after exceeding API_FREE_LIMIT are blocked
-  API_FREE_LIMIT: 50000, // number of api requests per month before 429 is returned. If using an API key, calls over this are charged.
-  API_BILLING_UNIT: 100, // how many calls is equivalent to a unit of calls e.g. 100 calls per $0.01.
-  API_KEY_PER_MIN_LIMIT: 300, // Rate limit per minute if using an API key
-  NO_API_KEY_PER_MIN_LIMIT: 60, // Rate limit per minute if not using an API key
+  API_FREE_LIMIT: '50000', // number of api requests per month before 429 is returned. If using an API key, calls over this are charged.
+  API_BILLING_UNIT: '100', // how many calls is equivalent to a unit of calls e.g. 100 calls per $0.01.
+  API_KEY_PER_MIN_LIMIT: '300', // Rate limit per minute if using an API key
+  NO_API_KEY_PER_MIN_LIMIT: '60', // Rate limit per minute if not using an API key
   ADMIN_ACCOUNT_IDS: '', // Whitelisted, comma separated account IDs to access /admin* routes
-  BACKUP_RETRIEVER_PERCENT: 0, // percent of replay salts to fetch from backup data source
-  GCDATA_PARALLELISM: 1, // Number of simultaneous GC match details requests to make (per retriever)
   STRIPE_SECRET: 'rk_test_gRqwhv4xqv0a1olp8kk8fZ94', // for stripe payment processing (kept on server)
   STRIPE_API_PLAN: 'plan_CgLthOgwrDgz2K', // plan id for stripe metering
-  ES_SEARCH_PERCENT: 0, // % of users to roll out elasticsearch to
+  ENABLE_MATCH_ARCHIVE: '', // Allow reading/writing parsed match blobs to S3 storage
   MATCH_ARCHIVE_S3_KEY_ID: '', // S3-compatible key ID to archive parsed match blobs
   MATCH_ARCHIVE_S3_KEY_SECRET: '', // S3-compatible key secret to archive parsed match blobs
   MATCH_ARCHIVE_S3_ENDPOINT: '', // S3-compatible endpoint to archive parsed match blobs
   MATCH_ARCHIVE_S3_BUCKET: 'opendota', // name of the S3 bucket to archive parsed match blobs
+  ENABLE_PLAYER_ARCHIVE: '', // Allow reading/writing player match blobs to S3 storage
+  PLAYER_ARCHIVE_S3_KEY_ID: '', // S3-compatible key ID to archive player match blobs
+  PLAYER_ARCHIVE_S3_KEY_SECRET: '', // S3-compatible key secret to archive player match blobs
+  PLAYER_ARCHIVE_S3_ENDPOINT: '', // S3-compatible endpoint to archive player match blobs
+  PLAYER_ARCHIVE_S3_BUCKET: 'opendota-players', // name of the S3 bucket to archive player match blobs
+  DISABLE_REPARSE: '', // Disable reparsing matches that are already parsed
+  DISABLE_REGCDATA: '', // Disable refetching new GC data on every request (cache it)
+  DISABLE_REAPI: '', // Disable refetching new API data on every request
+  API_KEY_GEN_THRESHOLD: '0', // Account ID requirement (delta from max) for generating API keys
 };
-// ensure that process.env has all values in defaults, but prefer the process.env value
-Object.keys(defaults).forEach((key) => {
-  process.env[key] = key in process.env ? process.env[key] : defaults[key];
-});
 if (process.env.NODE_ENV === 'development') {
   // force PORT to null in development so we can run multiple web services without conflict
   process.env.PORT = '';
@@ -83,15 +83,15 @@ if (process.env.NODE_ENV === 'test') {
   process.env.POSTGRES_URL = process.env.POSTGRES_URL + '_test';
   process.env.CASSANDRA_URL = process.env.CASSANDRA_URL + '_test';
   process.env.SCYLLA_URL = process.env.SCYLLA_URL + '_test';
-  process.env.REDIS_URL = process.env.REDIS_URL.slice(0, -1) + '1';
+  process.env.REDIS_URL = process.env.REDIS_URL?.slice(0, -1) + '1';
   process.env.SESSION_SECRET = 'testsecretvalue';
-  process.env.ENABLE_MATCH_CACHE = 1;
-  process.env.FRONTEND_PORT = 5001;
-  process.env.PARSER_PORT = 5201;
+  process.env.FRONTEND_PORT = '5001';
+  process.env.PARSER_PORT = '5201';
 }
 
 // Export the combined values
-module.exports = {
+export const config = {
   ...defaults,
   ...process.env,
 };
+export default config;
diff --git a/dev/archiveTest.mts b/dev/archiveTest.mts
@@ -1,28 +1,28 @@
-import { archivePut } from '../store/archive.js';
-import {
-  getArchivedMatch,
-  getMatchData,
-  getPlayerMatchData,
-} from '../store/queries.js';
+const { Archive } = await import('../store/archive.js');
+const { tryReadArchivedMatch } = await import('../store/getArchivedData.js');
+const { getMatchDataFromCassandra, getPlayerMatchData } = await import(
+  '../store/queries.js'
+);
 
 // Read some match data
 const match = {
-  ...(await getMatchData('7465883253', 'cassandra')),
-  players: await getPlayerMatchData('7465883253'),
+  ...(await getMatchDataFromCassandra(7465883253)),
+  players: await getPlayerMatchData(7465883253),
 };
 const blob = Buffer.from(JSON.stringify(match));
 
+const archive = new Archive('match');
 // Archive it
-const putRes = await archivePut(match.match_id?.toString() ?? '', blob);
+const putRes = await archive.archivePut(match.match_id?.toString() ?? '', blob);
 console.log(putRes);
 
 // Read it back
-const readBack = await getArchivedMatch(match.match_id?.toString() ?? '');
+const readBack = await tryReadArchivedMatch(match.match_id?.toString() ?? '');
 
 console.log(JSON.stringify(match).length, JSON.stringify(readBack).length);
 
 // Verify we get back null for invalid match id
-const nullMatch = await getArchivedMatch('123');
+const nullMatch = await tryReadArchivedMatch('123');
 console.log(nullMatch);
 
 // Confirm API returns the same data whether we used the archive or not
diff --git a/dev/checkAccounts.ts b/dev/checkAccounts.ts
@@ -1,7 +1,8 @@
 import fs from 'fs';
 import Steam from 'steam';
+import { EOL } from 'os';
 const accountData = fs.readFileSync('./STEAM_ACCOUNT_DATA_BAD.txt', 'utf8');
-const accountArray = accountData.split(require('os').EOL);
+const accountArray = accountData.split(EOL);
 
 let index = Number(process.argv[2]) || -1;
 index += 1;

diff --git a/dev/convertSpec.ts b/dev/convertSpec.ts
@@ -1,4 +1,4 @@
 import fs from 'fs';
-import spec from '../routes/spec.js';
+import spec from '../routes/spec';
 
 fs.writeFileSync('./spec.json', JSON.stringify(spec, null, 2), 'utf-8');
diff --git a/dev/findProMatches.mts b/dev/findProMatches.mts
@@ -1,17 +1,13 @@
-import { insertMatchPromise } from '../store/queries.js';
-import dbImport from '../store/db.js';
-import { generateJob, getDataPromise } from '../util/utility.js';
-
-const db = dbImport.default;
-
-// const leagueUrl = generateJob('api_leagues', {}).url;
+const { insertMatch } = await import('../store/insert.js');
+const { db } = await import('../store/db.js');
+const { generateJob, getSteamAPIData } = await import('../util/utility.js');
 
 async function getPage(url: string, leagueid: number) {
-  const data: any = await getDataPromise(url);
+  const data: any = await getSteamAPIData(url);
   console.log(
     leagueid,
     data.result.total_results,
-    data.result.results_remaining
+    data.result.results_remaining,
   );
   for (let i = 0; i < data.result.matches.length; i++) {
     const match = data.results.matches[i];
@@ -20,13 +16,12 @@ async function getPage(url: string, leagueid: number) {
       match_id: match.match_id,
     });
     const { url } = job;
-    const body: any = await getDataPromise({
+    const body: any = await getSteamAPIData({
       url,
-      delay: 200,
     });
     if (body.result) {
       const match = body.result;
-      await insertMatchPromise(match, { type: 'api', skipParse: true });
+      await insertMatch(match, { type: 'api' });
     }
   }
   if (data.result.results_remaining) {
@@ -51,7 +46,7 @@ leagueIds.forEach(async (leagueid: number) => {
   const { url } = generateJob('api_history', {
     leagueid,
   });
-  return await getPage(url, leagueid);
+  return getPage(url, leagueid);
 });
 process.exit(0);
 // From API

diff --git a/dev/generateFakeRatings.mts b/dev/generateFakeRatings.mts
@@ -1,5 +1,4 @@
-import dbImport from '../store/db.js';
-const db = dbImport.default;
+const { db } = await import('../store/db.js');
 
 function randByCentralLimitTheorem() {
   let v = 0;
@@ -12,7 +11,7 @@ function randByCentralLimitTheorem() {
 function gaussianRandom(mean: number, std: number) {
   if (mean === undefined || std === undefined) {
     throw new Error(
-      'Gaussian random needs 2 arguments (mean, standard deviation)'
+      'Gaussian random needs 2 arguments (mean, standard deviation)',
     );
   }
   return randByCentralLimitTheorem() * std + mean;

diff --git a/dev/keyTest.mts b/dev/keyTest.mts
@@ -1,13 +1,13 @@
 import axios from 'axios';
-import config from '../config.js';
+const { config } = await import('../config.js');
 
 const output: string[] = [];
 
 const arr = config.STEAM_API_KEY.split(',');
 for (let i = 0; i < arr.length; i++) {
   const key = arr[i];
   const resp = await axios.get(
-    `http://api.steampowered.com/IDOTA2Match_570/GetMatchHistory/V001/?key=${key}`
+    `http://api.steampowered.com/IDOTA2Match_570/GetMatchHistory/V001/?key=${key}`,
   );
   console.log(key, resp.status);
   if (resp.status !== 200) {

diff --git a/dev/lobby.ts b/dev/lobby.ts
@@ -1,11 +1,4 @@
 /*
-const config = require('./config');
-const Steam = require('steam');
-const Dota2 = require('dota2');
-const utility = require('./utility');
-const async = require('async');
-const convert64To32 = utility.convert64to32;
-const express = require('express');
 const app = express();
 const users = config.STEAM_USER.split(',');
 const passes = config.STEAM_PASS.split(',');

diff --git a/dev/metaParse.mts b/dev/metaParse.mts
@@ -1,9 +1,8 @@
 import fs from 'fs';
-import meta from '../store/getMeta.js';
-const { getMetaFromUrl } = meta;
+const { getMetaFromUrl } = await import('../store/getMeta.js');
 
 const message = await getMetaFromUrl(
-  'http://replay117.valve.net/570/7468445438_1951738768.meta.bz2'
+  'http://replay117.valve.net/570/7468445438_1951738768.meta.bz2',
 );
 // Stats: Original bzip2, 77kb, unzipped, 113kb, parsed JSON 816kb
 // fs.writeFileSync(

diff --git a/dev/playerCachesArchive.mts b/dev/playerCachesArchive.mts
@@ -0,0 +1,22 @@
+import fs from 'fs';
+const { getPlayerMatchesWithMetadata } = await import(
+  '../store/queries.js'
+);
+const { doArchivePlayerMatches } = await import('../store/getArchivedData.js');
+
+// Write player blob to archive
+await doArchivePlayerMatches('88367253');
+
+// Read it back
+// await getArchivedPlayerMatches('88367253');
+
+// Check the combined getPlayerMatches results
+const readBack = await getPlayerMatchesWithMetadata('88367253', {
+  project: [],
+  projectAll: true,
+});
+console.log(readBack[1]);
+
+// There shouldn't be any duplicate match IDs
+// The data should be the same
+fs.writeFileSync('./build/88367253,json', JSON.stringify(readBack[0], null, 2));
diff --git a/dev/reParse.ts b/dev/reParse.ts
@@ -1,27 +1,19 @@
 // Issues reparse requests for all matches in postgres that aren't parsed
 import db from '../store/db';
-import { insertMatchPromise } from '../store/queries';
-import { getDataPromise, generateJob } from '../util/utility';
+import queue from '../store/queue';
 
 async function start() {
   const matches = await db.raw(
-    'select match_id from matches where version IS NULL'
+    'select match_id from matches where version IS NULL',
   );
   console.log(matches.rows.length);
   for (let i = 0; i < matches.rows.length; i++) {
     const input = matches.rows[i];
     // match id request, get data from API
-    const body: any = await getDataPromise(
-      generateJob('api_details', input).url
+    await queue.addReliableJob(
+      { name: 'parse', data: { match_id: input.match_id } },
+      {},
     );
-    // match details response
-    const match = body.result;
-    const job = await insertMatchPromise(match, {
-      type: 'api',
-      attempts: 1,
-      priority: 1,
-      forceParse: true,
-    });
   }
 }
 start();