Skip to content

Commit

Permalink
Merge pull request #2685 from odota/readblob
Browse files Browse the repository at this point in the history
make blobstore the primary
  • Loading branch information
howardchung authored Dec 22, 2023
2 parents ef42ae2 + eada04f commit 3e00171
Show file tree
Hide file tree
Showing 121 changed files with 37,320 additions and 4,876 deletions.
16 changes: 0 additions & 16 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,3 @@ updates:
schedule:
interval: daily
open-pull-requests-limit: 10
ignore:
- dependency-name: stripe
versions:
- 8.132.0
- 8.133.0
- 8.134.0
- 8.135.0
- 8.136.0
- 8.137.0
- 8.138.0
- 8.139.0
- 8.140.0
- 8.141.0
- 8.142.0
- 8.143.0
- 8.145.0
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ docker-compose.override.yml
STEAM_ACCOUNT_DATA*.txt
.DS_Store
.vscode
build
build
.nyc_output
1 change: 1 addition & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
json
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM node:21-bullseye
FROM node:20

ENV NPM_CONFIG_LOGLEVEL warn

Expand Down
82 changes: 41 additions & 41 deletions config.js → config.ts
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
/**
* File managing configuration for the application
* */
require('dotenv').config();
import 'dotenv/config';

const defaults = {
STEAM_API_KEY: '', // for API reqs, in worker
STEAM_USER: '', // for getting replay salt/profile data, in retriever
STEAM_PASS: '',
STEAM_PASS: '', // for getting replay salt/profile data, in retriever
ROLE: '', // for specifying the file that should be run when entry point is invoked
GROUP: '', // for specifying the group of apps that should be run when entry point is invoked
START_SEQ_NUM: '', // truthy: use sequence number stored in redis, else: use approximate value from live API
PROVIDER: '', // The cloud provider used by the application (determines how environment data is downloaded)
STEAM_ACCOUNT_DATA: '', // The URL to read Steam account data from
NODE_ENV: 'development',
PORT: '', // Default port to use by services often set by the system
FRONTEND_PORT: '5000',
RETRIEVER_PORT: '5100',
PARSER_PORT: '5200',
PROXY_PORT: '5300',
FRONTEND_PORT: '5000', // Port to run the webserver/API on
RETRIEVER_PORT: '5100', // Port to run the Steam GC retriever on
PARSER_PORT: '5200', // Port to run the parser service on. Note: This is the JS service that processes jobs, not the Java server that actually parses replays (PARSER_HOST)
PROXY_PORT: '5300', // Port to run the Steam API proxy on
ROOT_URL: 'http://localhost:5000', // base url to redirect to after steam oauth login
RETRIEVER_HOST: 'localhost:5100', // Comma separated list of retriever hosts (access to Dota 2 GC data)
GCDATA_RETRIEVER_HOST: '', // Comma separated list of retriever hosts dedicated for gcdata job
PARSER_HOST: 'http://localhost:5600', // host of the parse server
PARSER_HOST: 'http://localhost:5600', // host of the Java parse server
UI_HOST: '', // The host of the UI, target of /logout and /return
PROXY_URLS: '', // comma separated list of proxy urls to use
STEAM_API_HOST: 'api.steampowered.com', // comma separated list of hosts to fetch Steam API data from
POSTGRES_URL: 'postgresql://postgres:postgres@localhost/yasp', // connection string for PostgreSQL
READONLY_POSTGRES_URL: 'postgresql://readonly:readonly@localhost/yasp', // readonly connection string for PostgreSQL
Expand All @@ -34,46 +33,47 @@ const defaults = {
RETRIEVER_SECRET: '', // string to use as shared secret with retriever/parser
SESSION_SECRET: 'secret to encrypt cookies with', // string to encrypt cookies
COOKIE_DOMAIN: '', // domain to use for the cookie. Use e.g. '.opendota.com' to share cookie across subdomains
UNTRACK_DAYS: 30, // The number of days a user is tracked for after every visit
GOAL: 5, // The cheese goal
DEFAULT_DELAY: 1000, // delay between API requests
SCANNER_DELAY: 2000, // delay for scanner API requests (stricter rate limit)
MMR_PARALLELISM: 10, // Number of simultaneous MMR requests to make (per retriever)
PARSER_PARALLELISM: 1, // Number of simultaneous parse jobs to run (per parser)
BENCHMARK_RETENTION_MINUTES: 60, // minutes in block to retain benchmark data for percentile
GCDATA_PERCENT: 0, // percent of inserted matches to randomly queue for GC data
SCANNER_PERCENT: 100, // percent of matches to insert from scanner
PUBLIC_SAMPLE_PERCENT: 10, // percent of public matches to sample in DB
SCENARIOS_SAMPLE_PERCENT: 100, // percent of parsed matches to sample for scenarios
BENCHMARKS_SAMPLE_PERCENT: 100, // percent of parsed matches to sample for benchmarks
UNTRACK_DAYS: '30', // The number of days a user is tracked for after every visit
MMR_PARALLELISM: '1', // Number of simultaneous MMR requests to make (per retriever)
PARSER_PARALLELISM: '1', // Number of simultaneous parse jobs to run (per parser)
FULLHISTORY_PARALLELISM: '1', // Number of simultaneous fullhistory (player refresh) jobs to process
GCDATA_PARALLELISM: '1', // Number of simultaneous GC match details requests to make (per retriever)
BENCHMARK_RETENTION_MINUTES: '60', // minutes in block to retain benchmark data for percentile
GCDATA_PERCENT: '0', // percent of inserted matches to randomly queue for GC data
SCANNER_PERCENT: '100', // percent of matches to insert from scanner
PUBLIC_SAMPLE_PERCENT: '10', // percent of public matches to sample in DB
SCENARIOS_SAMPLE_PERCENT: '100', // percent of parsed matches to sample for scenarios
BENCHMARKS_SAMPLE_PERCENT: '100', // percent of parsed matches to sample for benchmarks
ENABLE_MATCH_CACHE: '', // set to enable caching matches in Redis
ENABLE_PLAYER_CACHE: '', // enable/disable player aggregation caching
ENABLE_RANDOM_MMR_UPDATE: '', // set to request MMR updates after ranked matches
MAXIMUM_AGE_SCENARIOS_ROWS: 4, // maximum allowed age of scenarios rows in weeks
MATCH_CACHE_SECONDS: 60, // number of seconds to cache matches
PLAYER_CACHE_SECONDS: 1800, // number of seconds to cache player aggregations
SCANNER_PLAYER_PERCENT: 100, // percent of matches from scanner to insert player account IDs for (discover new player account IDs)
ENABLE_RETRIEVER_ADVANCED_AUTH: '', // set to enable retriever two-factor and SteamGuard authentication,
MAXIMUM_AGE_SCENARIOS_ROWS: '4', // maximum allowed age of scenarios rows in weeks
MATCH_CACHE_SECONDS: '60', // number of seconds to cache matches
PLAYER_CACHE_SECONDS: '1800', // number of seconds to cache player aggregations
SCANNER_PLAYER_PERCENT: '100', // percent of matches from scanner to insert player account IDs for (discover new player account IDs)
ENABLE_API_LIMIT: '', // if truthy, API calls after exceeding API_FREE_LIMIT are blocked
API_FREE_LIMIT: 50000, // number of api requests per month before 429 is returned. If using an API key, calls over this are charged.
API_BILLING_UNIT: 100, // how many calls is equivalent to a unit of calls e.g. 100 calls per $0.01.
API_KEY_PER_MIN_LIMIT: 300, // Rate limit per minute if using an API key
NO_API_KEY_PER_MIN_LIMIT: 60, // Rate limit per minute if not using an API key
API_FREE_LIMIT: '50000', // number of api requests per month before 429 is returned. If using an API key, calls over this are charged.
API_BILLING_UNIT: '100', // how many calls is equivalent to a unit of calls e.g. 100 calls per $0.01.
API_KEY_PER_MIN_LIMIT: '300', // Rate limit per minute if using an API key
NO_API_KEY_PER_MIN_LIMIT: '60', // Rate limit per minute if not using an API key
ADMIN_ACCOUNT_IDS: '', // Whitelisted, comma separated account IDs to access /admin* routes
BACKUP_RETRIEVER_PERCENT: 0, // percent of replay salts to fetch from backup data source
GCDATA_PARALLELISM: 1, // Number of simultaneous GC match details requests to make (per retriever)
STRIPE_SECRET: 'rk_test_gRqwhv4xqv0a1olp8kk8fZ94', // for stripe payment processing (kept on server)
STRIPE_API_PLAN: 'plan_CgLthOgwrDgz2K', // plan id for stripe metering
ES_SEARCH_PERCENT: 0, // % of users to roll out elasticsearch to
ENABLE_MATCH_ARCHIVE: '', // Allow reading/writing parsed match blobs to S3 storage
MATCH_ARCHIVE_S3_KEY_ID: '', // S3-compatible key ID to archive parsed match blobs
MATCH_ARCHIVE_S3_KEY_SECRET: '', // S3-compatible key secret to archive parsed match blobs
MATCH_ARCHIVE_S3_ENDPOINT: '', // S3-compatible endpoint to archive parsed match blobs
MATCH_ARCHIVE_S3_BUCKET: 'opendota', // name of the S3 bucket to archive parsed match blobs
ENABLE_PLAYER_ARCHIVE: '', // Allow reading/writing player match blobs to S3 storage
PLAYER_ARCHIVE_S3_KEY_ID: '', // S3-compatible key ID to archive player match blobs
PLAYER_ARCHIVE_S3_KEY_SECRET: '', // S3-compatible key secret to archive player match blobs
PLAYER_ARCHIVE_S3_ENDPOINT: '', // S3-compatible endpoint to archive player match blobs
PLAYER_ARCHIVE_S3_BUCKET: 'opendota-players', // name of the S3 bucket to archive player match blobs
DISABLE_REPARSE: '', // Disable reparsing matches that are already parsed
DISABLE_REGCDATA: '', // Disable refetching new GC data on every request (cache it)
DISABLE_REAPI: '', // Disable refetching new API data on every request
API_KEY_GEN_THRESHOLD: '0', // Account ID requirement (delta from max) for generating API keys
};
// ensure that process.env has all values in defaults, but prefer the process.env value
Object.keys(defaults).forEach((key) => {
process.env[key] = key in process.env ? process.env[key] : defaults[key];
});
if (process.env.NODE_ENV === 'development') {
// force PORT to null in development so we can run multiple web services without conflict
process.env.PORT = '';
Expand All @@ -83,15 +83,15 @@ if (process.env.NODE_ENV === 'test') {
process.env.POSTGRES_URL = process.env.POSTGRES_URL + '_test';
process.env.CASSANDRA_URL = process.env.CASSANDRA_URL + '_test';
process.env.SCYLLA_URL = process.env.SCYLLA_URL + '_test';
process.env.REDIS_URL = process.env.REDIS_URL.slice(0, -1) + '1';
process.env.REDIS_URL = process.env.REDIS_URL?.slice(0, -1) + '1';
process.env.SESSION_SECRET = 'testsecretvalue';
process.env.ENABLE_MATCH_CACHE = 1;
process.env.FRONTEND_PORT = 5001;
process.env.PARSER_PORT = 5201;
process.env.FRONTEND_PORT = '5001';
process.env.PARSER_PORT = '5201';
}

// Export the combined values
module.exports = {
export const config = {
...defaults,
...process.env,
};
export default config;
22 changes: 11 additions & 11 deletions dev/archiveTest.mts
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
import { archivePut } from '../store/archive.js';
import {
getArchivedMatch,
getMatchData,
getPlayerMatchData,
} from '../store/queries.js';
const { Archive } = await import('../store/archive.js');
const { tryReadArchivedMatch } = await import('../store/getArchivedData.js');
const { getMatchDataFromCassandra, getPlayerMatchData } = await import(
'../store/queries.js'
);

// Read some match data
const match = {
...(await getMatchData('7465883253', 'cassandra')),
players: await getPlayerMatchData('7465883253'),
...(await getMatchDataFromCassandra(7465883253)),
players: await getPlayerMatchData(7465883253),
};
const blob = Buffer.from(JSON.stringify(match));

const archive = new Archive('match');
// Archive it
const putRes = await archivePut(match.match_id?.toString() ?? '', blob);
const putRes = await archive.archivePut(match.match_id?.toString() ?? '', blob);
console.log(putRes);

// Read it back
const readBack = await getArchivedMatch(match.match_id?.toString() ?? '');
const readBack = await tryReadArchivedMatch(match.match_id?.toString() ?? '');

console.log(JSON.stringify(match).length, JSON.stringify(readBack).length);

// Verify we get back null for invalid match id
const nullMatch = await getArchivedMatch('123');
const nullMatch = await tryReadArchivedMatch('123');
console.log(nullMatch);

// Confirm API returns the same data whether we used the archive or not
3 changes: 2 additions & 1 deletion dev/checkAccounts.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import fs from 'fs';
import Steam from 'steam';
import { EOL } from 'os';
const accountData = fs.readFileSync('./STEAM_ACCOUNT_DATA_BAD.txt', 'utf8');
const accountArray = accountData.split(require('os').EOL);
const accountArray = accountData.split(EOL);

let index = Number(process.argv[2]) || -1;
index += 1;
Expand Down
2 changes: 1 addition & 1 deletion dev/convertSpec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import fs from 'fs';
import spec from '../routes/spec.js';
import spec from '../routes/spec';

fs.writeFileSync('./spec.json', JSON.stringify(spec, null, 2), 'utf-8');
21 changes: 8 additions & 13 deletions dev/findProMatches.mts
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
import { insertMatchPromise } from '../store/queries.js';
import dbImport from '../store/db.js';
import { generateJob, getDataPromise } from '../util/utility.js';

const db = dbImport.default;

// const leagueUrl = generateJob('api_leagues', {}).url;
const { insertMatch } = await import('../store/insert.js');
const { db } = await import('../store/db.js');
const { generateJob, getSteamAPIData } = await import('../util/utility.js');

async function getPage(url: string, leagueid: number) {
const data: any = await getDataPromise(url);
const data: any = await getSteamAPIData(url);
console.log(
leagueid,
data.result.total_results,
data.result.results_remaining
data.result.results_remaining,
);
for (let i = 0; i < data.result.matches.length; i++) {
const match = data.results.matches[i];
Expand All @@ -20,13 +16,12 @@ async function getPage(url: string, leagueid: number) {
match_id: match.match_id,
});
const { url } = job;
const body: any = await getDataPromise({
const body: any = await getSteamAPIData({
url,
delay: 200,
});
if (body.result) {
const match = body.result;
await insertMatchPromise(match, { type: 'api', skipParse: true });
await insertMatch(match, { type: 'api' });
}
}
if (data.result.results_remaining) {
Expand All @@ -51,7 +46,7 @@ leagueIds.forEach(async (leagueid: number) => {
const { url } = generateJob('api_history', {
leagueid,
});
return await getPage(url, leagueid);
return getPage(url, leagueid);
});
process.exit(0);
// From API
Expand Down
5 changes: 2 additions & 3 deletions dev/generateFakeRatings.mts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import dbImport from '../store/db.js';
const db = dbImport.default;
const { db } = await import('../store/db.js');

function randByCentralLimitTheorem() {
let v = 0;
Expand All @@ -12,7 +11,7 @@ function randByCentralLimitTheorem() {
function gaussianRandom(mean: number, std: number) {
if (mean === undefined || std === undefined) {
throw new Error(
'Gaussian random needs 2 arguments (mean, standard deviation)'
'Gaussian random needs 2 arguments (mean, standard deviation)',
);
}
return randByCentralLimitTheorem() * std + mean;
Expand Down
4 changes: 2 additions & 2 deletions dev/keyTest.mts
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import axios from 'axios';
import config from '../config.js';
const { config } = await import('../config.js');

const output: string[] = [];

const arr = config.STEAM_API_KEY.split(',');
for (let i = 0; i < arr.length; i++) {
const key = arr[i];
const resp = await axios.get(
`http://api.steampowered.com/IDOTA2Match_570/GetMatchHistory/V001/?key=${key}`
`http://api.steampowered.com/IDOTA2Match_570/GetMatchHistory/V001/?key=${key}`,
);
console.log(key, resp.status);
if (resp.status !== 200) {
Expand Down
7 changes: 0 additions & 7 deletions dev/lobby.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
/*
const config = require('./config');
const Steam = require('steam');
const Dota2 = require('dota2');
const utility = require('./utility');
const async = require('async');
const convert64To32 = utility.convert64to32;
const express = require('express');
const app = express();
const users = config.STEAM_USER.split(',');
const passes = config.STEAM_PASS.split(',');
Expand Down
5 changes: 2 additions & 3 deletions dev/metaParse.mts
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import fs from 'fs';
import meta from '../store/getMeta.js';
const { getMetaFromUrl } = meta;
const { getMetaFromUrl } = await import('../store/getMeta.js');

const message = await getMetaFromUrl(
'http://replay117.valve.net/570/7468445438_1951738768.meta.bz2'
'http://replay117.valve.net/570/7468445438_1951738768.meta.bz2',
);
// Stats: Original bzip2, 77kb, unzipped, 113kb, parsed JSON 816kb
// fs.writeFileSync(
Expand Down
22 changes: 22 additions & 0 deletions dev/playerCachesArchive.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import fs from 'fs';
const { getPlayerMatchesWithMetadata } = await import(
'../store/queries.js'
);
const { doArchivePlayerMatches } = await import('../store/getArchivedData.js');

// Write player blob to archive
await doArchivePlayerMatches('88367253');

// Read it back
// await getArchivedPlayerMatches('88367253');

// Check the combined getPlayerMatches results
const readBack = await getPlayerMatchesWithMetadata('88367253', {
project: [],
projectAll: true,
});
console.log(readBack[1]);

// There shouldn't be any duplicate match IDs
// The data should be the same
fs.writeFileSync('./build/88367253,json', JSON.stringify(readBack[0], null, 2));
18 changes: 5 additions & 13 deletions dev/reParse.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,19 @@
// Issues reparse requests for all matches in postgres that aren't parsed
import db from '../store/db';
import { insertMatchPromise } from '../store/queries';
import { getDataPromise, generateJob } from '../util/utility';
import queue from '../store/queue';

async function start() {
const matches = await db.raw(
'select match_id from matches where version IS NULL'
'select match_id from matches where version IS NULL',
);
console.log(matches.rows.length);
for (let i = 0; i < matches.rows.length; i++) {
const input = matches.rows[i];
// match id request, get data from API
const body: any = await getDataPromise(
generateJob('api_details', input).url
await queue.addReliableJob(
{ name: 'parse', data: { match_id: input.match_id } },
{},
);
// match details response
const match = body.result;
const job = await insertMatchPromise(match, {
type: 'api',
attempts: 1,
priority: 1,
forceParse: true,
});
}
}
start();
Loading

0 comments on commit 3e00171

Please sign in to comment.