Skip to content

runners: Remove SSM deletion from terminateRunner #6934

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ import {
RunnerInputParameters,
createRunner,
findAmiID,
getParameterNameForRunner,
listRunners,
listSSMParameters,
resetRunnersCaches,
Expand Down Expand Up @@ -345,21 +344,11 @@ describe('terminateRunner', () => {
instanceId: 'i-1234',
environment: 'gi-ci',
};
mockSSMdescribeParametersRet.mockResolvedValueOnce({
Parameters: [getParameterNameForRunner(runner.environment as string, runner.instanceId)].map((s) => {
return { Name: s };
}),
});
await terminateRunner(runner, metrics);

expect(mockEC2.terminateInstances).toBeCalledWith({
InstanceIds: [runner.instanceId],
});
expect(mockSSM.describeParameters).toBeCalledTimes(1);
expect(mockSSM.deleteParameter).toBeCalledTimes(1);
expect(mockSSM.deleteParameter).toBeCalledWith({
Name: getParameterNameForRunner(runner.environment as string, runner.instanceId),
});
});

it('fails to terminate', async () => {
Expand All @@ -372,11 +361,9 @@ describe('terminateRunner', () => {
promise: jest.fn().mockRejectedValueOnce(Error(errMsg)),
});
expect(terminateRunner(runner, metrics)).rejects.toThrowError(errMsg);
expect(mockSSM.describeParameters).not.toBeCalled();
expect(mockSSM.deleteParameter).not.toBeCalled();
});

it('fails to list parameters on terminate, then force delete all next parameters', async () => {
it('terminates multiple runners', async () => {
const runner1: RunnerInfo = {
awsRegion: Config.Instance.awsRegion,
instanceId: '1234',
Expand All @@ -387,19 +374,10 @@ describe('terminateRunner', () => {
instanceId: '1235',
environment: 'environ',
};
mockSSMdescribeParametersRet.mockRejectedValueOnce('Some Error');
await terminateRunner(runner1, metrics);
await terminateRunner(runner2, metrics);

expect(mockEC2.terminateInstances).toBeCalledTimes(2);
expect(mockSSM.describeParameters).toBeCalledTimes(1);
expect(mockSSM.deleteParameter).toBeCalledTimes(2);
expect(mockSSM.deleteParameter).toBeCalledWith({
Name: getParameterNameForRunner(runner1.environment as string, runner1.instanceId),
});
expect(mockSSM.deleteParameter).toBeCalledWith({
Name: getParameterNameForRunner(runner2.environment as string, runner2.instanceId),
});
});
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ export interface DescribeInstancesResultRegion {
describeInstanceResult: PromiseResult<EC2.Types.DescribeInstancesResult, AWS.AWSError>;
}

const SHOULD_NOT_TRY_LIST_SSM = 'SHOULD_NOT_TRY_LIST_SSM';

// Keep the cache as long as half of minimum time, this should reduce calls to AWS API
const ssmParametersCache = new LRU({ maxAge: (Config.Instance.minimumRunningTimeInMinutes * 60 * 1000) / 2 });

Expand Down Expand Up @@ -174,7 +172,7 @@ export async function listRunners(
.describeInstances({ Filters: ec2Filters })
.promise()
.then((describeInstanceResult): DescribeInstancesResultRegion => {
const listOfRunnersIdType: string[] = (
(
describeInstanceResult?.Reservations?.flatMap((reservation) => {
return (
reservation.Instances?.map((instance) => {
Expand Down Expand Up @@ -327,31 +325,6 @@ export async function terminateRunner(runner: RunnerInfo, metrics: Metrics): Pro
);
});
console.info(`Runner terminated: ${runner.instanceId} ${runner.runnerType}`);

const paramName = getParameterNameForRunner(runner.environment || Config.Instance.environment, runner.instanceId);
const cacheName = `${SHOULD_NOT_TRY_LIST_SSM}_${runner.awsRegion}`;

if (ssmParametersCache.has(cacheName)) {
doDeleteSSMParameter(paramName, metrics, runner.awsRegion);
} else {
try {
const params = await listSSMParameters(metrics, runner.awsRegion);

if (params.has(paramName)) {
doDeleteSSMParameter(paramName, metrics, runner.awsRegion);
} else {
/* istanbul ignore next */
console.info(`[${runner.awsRegion}] Parameter "${paramName}" not found in SSM, no need to delete it`);
}
} catch (e) {
ssmParametersCache.set(cacheName, 1, 60 * 1000);
console.error(
`[terminateRunner - listSSMParameters] [${runner.awsRegion}] ` +
`Failed to list parameters or check if available: ${e}`,
);
doDeleteSSMParameter(paramName, metrics, runner.awsRegion);
}
}
} catch (e) {
console.error(`[${runner.awsRegion}] [terminateRunner]: ${e}`);
throw e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,8 @@ import {
resetGHRunnersCaches,
} from './gh-runners';
import * as MetricsModule from './metrics';
import { listRunners, resetRunnersCaches, terminateRunner, RunnerType } from './runners';
import {
doDeleteSSMParameter,
listRunners,
resetRunnersCaches,
terminateRunner,
RunnerType,
listSSMParameters,
} from './runners';
import {
cleanupOldSSMParameters,
getGHRunnerOrg,
getGHRunnerRepo,
isEphemeralRunner,
Expand All @@ -38,7 +30,6 @@ import {
sortSSMParametersByUpdateTime,
} from './scale-down';
import { RequestError } from '@octokit/request-error';
import { SSM } from 'aws-sdk';

jest.mock('./gh-runners', () => ({
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
Expand All @@ -56,9 +47,7 @@ jest.mock('./gh-runners', () => ({
jest.mock('./runners', () => ({
/* eslint-disable-next-line @typescript-eslint/no-explicit-any */
...(jest.requireActual('./runners') as any),
doDeleteSSMParameter: jest.fn(),
listRunners: jest.fn(),
listSSMParameters: jest.fn(),
resetRunnersCaches: jest.fn(),
terminateRunner: jest.fn(),
}));
Expand All @@ -79,8 +68,6 @@ const mockRunner = (runnerDef: any) => {
return runnerDef as GhRunner;
};

const MAX_SSM_PARAMETERS = 25;

const metrics = new MetricsModule.ScaleDownMetrics();

const minimumRunningTimeInMinutes = 10;
Expand All @@ -95,8 +82,6 @@ const baseConfig = {
shuffledSubnetIdsForAwsRegion: jest.fn().mockImplementation((awsRegion: string) => {
return Array.from(subnetIds.get(awsRegion) ?? []).sort();
}),
sSMParamCleanupAgeDays: 7,
sSMParamMaxCleanupAllowance: MAX_SSM_PARAMETERS, // Easier to test
};

describe('scale-down', () => {
Expand Down Expand Up @@ -1506,122 +1491,6 @@ describe('scale-down', () => {
});
});

describe('cleanupOldSSMParameters', () => {
it('Stops when LastModifiedDate === undefined', async () => {
const oldDt = moment()
.subtract(Config.Instance.sSMParamCleanupAgeDays + 1, 'days')
.toDate();
const ssmParameters = new Map<string, SSM.ParameterMetadata>();
ssmParameters.set('WG113', { Name: 'WG113', LastModifiedDate: undefined });
ssmParameters.set('WG115', { Name: 'WG115', LastModifiedDate: oldDt });
ssmParameters.set('WG116', { Name: 'WG116', LastModifiedDate: oldDt });

const mockedDoDeleteSSMParameter = mocked(doDeleteSSMParameter);
const mockedListSSMParameters = mocked(listSSMParameters);

mockedListSSMParameters.mockResolvedValueOnce(ssmParameters);
mockedDoDeleteSSMParameter.mockResolvedValue(true);

await cleanupOldSSMParameters(new Set(['us-east-1']), metrics);

expect(mockedDoDeleteSSMParameter).toBeCalledTimes(2);
expect(mockedDoDeleteSSMParameter).toBeCalledWith('WG115', metrics, 'us-east-1');
expect(mockedDoDeleteSSMParameter).toBeCalledWith('WG116', metrics, 'us-east-1');
expect(mockedListSSMParameters).toBeCalledTimes(1);
});

it('Stops when LastModifiedDate is < Config.Instance.sSMParamCleanupAgeDays', async () => {
const oldDt = moment()
.subtract(Config.Instance.sSMParamCleanupAgeDays + 1, 'days')
.toDate();
const olderDt = moment()
.subtract(Config.Instance.sSMParamCleanupAgeDays - 1, 'days')
.toDate();
const ssmParameters = new Map<string, SSM.ParameterMetadata>();
ssmParameters.set('WG113', { Name: 'WG113', LastModifiedDate: undefined });
ssmParameters.set('WG115', { Name: 'WG115', LastModifiedDate: oldDt });
ssmParameters.set('WG116', { Name: 'WG116', LastModifiedDate: olderDt });

const mockedDoDeleteSSMParameter = mocked(doDeleteSSMParameter);
const mockedListSSMParameters = mocked(listSSMParameters);

mockedListSSMParameters.mockResolvedValueOnce(ssmParameters);
mockedDoDeleteSSMParameter.mockResolvedValue(true);

await cleanupOldSSMParameters(new Set(['us-east-1']), metrics);

expect(mockedDoDeleteSSMParameter).toBeCalledTimes(1);
expect(mockedDoDeleteSSMParameter).toBeCalledWith('WG115', metrics, 'us-east-1');
expect(mockedListSSMParameters).toBeCalledTimes(1);
});

it('Stops when deleted >= Config.Instance.sSMParamMaxCleanupAllowance', async () => {
const oldDt = moment()
.subtract(Config.Instance.sSMParamCleanupAgeDays + 1, 'days')
.toDate();
const ssmParameters = new Map<string, SSM.ParameterMetadata>();
[...Array(MAX_SSM_PARAMETERS + 5).keys()].forEach((i) => {
const name = `AGDGADUWG113-${i}`;
ssmParameters.set(name, { Name: name, LastModifiedDate: oldDt });
});

const mockedDoDeleteSSMParameter = mocked(doDeleteSSMParameter);
const mockedListSSMParameters = mocked(listSSMParameters);

mockedListSSMParameters.mockResolvedValueOnce(ssmParameters);
mockedDoDeleteSSMParameter.mockResolvedValue(true);

await cleanupOldSSMParameters(new Set(['us-east-1']), metrics);

expect(mockedDoDeleteSSMParameter).toBeCalledTimes(MAX_SSM_PARAMETERS);
expect(mockedListSSMParameters).toBeCalledTimes(1);
});

it('Breaks when deleted >= Config.Instance.sSMParamMaxCleanupAllowance', async () => {
const oldDt = moment()
.subtract(Config.Instance.sSMParamCleanupAgeDays + 1, 'days')
.toDate();
const ssmParameters = new Map<string, SSM.ParameterMetadata>();
[...Array(MAX_SSM_PARAMETERS + 5).keys()].forEach((i) => {
const name = `AGDGADUWG113-${i}`;
ssmParameters.set(name, { Name: name, LastModifiedDate: oldDt });
});

const mockedDoDeleteSSMParameter = mocked(doDeleteSSMParameter);
const mockedListSSMParameters = mocked(listSSMParameters);

mockedListSSMParameters.mockResolvedValueOnce(ssmParameters);
mockedDoDeleteSSMParameter.mockResolvedValue(true);

await cleanupOldSSMParameters(new Set(['us-east-1']), metrics);

expect(mockedDoDeleteSSMParameter).toBeCalledTimes(MAX_SSM_PARAMETERS);
expect(mockedListSSMParameters).toBeCalledTimes(1);
});

it('Dont count failed to delete', async () => {
const oldDt = moment()
.subtract(Config.Instance.sSMParamCleanupAgeDays + 1, 'days')
.toDate();
const ssmParameters = new Map<string, SSM.ParameterMetadata>();
[...Array(MAX_SSM_PARAMETERS + 5).keys()].forEach((i) => {
const name = `AGDGADUWG113-${i}`;
ssmParameters.set(name, { Name: name, LastModifiedDate: oldDt });
});

const mockedDoDeleteSSMParameter = mocked(doDeleteSSMParameter);
const mockedListSSMParameters = mocked(listSSMParameters);

mockedListSSMParameters.mockResolvedValueOnce(ssmParameters);
mockedDoDeleteSSMParameter.mockResolvedValue(false);

await cleanupOldSSMParameters(new Set(['us-east-1']), metrics);

expect(mockedDoDeleteSSMParameter).toBeCalledTimes(MAX_SSM_PARAMETERS + 5);
expect(mockedListSSMParameters).toBeCalledTimes(1);
});
});

describe('getGHRunnerOrg', () => {
const ghRunners = [
{ name: 'instance-id-01', busy: true },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import {
resetGHRunnersCaches,
} from './gh-runners';
import { ScaleDownMetrics, sendMetricsAtTimeout, sendMetricsTimeoutVars } from './metrics';
import { doDeleteSSMParameter, listRunners, listSSMParameters, resetRunnersCaches, terminateRunner } from './runners';
import { listRunners, resetRunnersCaches, terminateRunner } from './runners';
import { getRepo, groupBy, Repo, RunnerInfo, isGHRateLimitError, shuffleArrayInPlace } from './utils';
import { SSM } from 'aws-sdk';

Expand Down Expand Up @@ -44,10 +44,6 @@ export async function scaleDown(): Promise<void> {
},
);

const runnersRegions = new Set<string>(
Array.from(runnersDict.values()).flatMap((runners) => runners.map((runner) => runner.awsRegion)),
);

if (runnersDict.size === 0) {
console.info(`No active runners found for environment: '${Config.Instance.environment}'`);
return;
Expand Down Expand Up @@ -144,9 +140,6 @@ export async function scaleDown(): Promise<void> {
}
}
}

await cleanupOldSSMParameters(runnersRegions, metrics);

console.info('Scale down completed');
} catch (e) {
/* istanbul ignore next */
Expand All @@ -161,46 +154,6 @@ export async function scaleDown(): Promise<void> {
}
}

export async function cleanupOldSSMParameters(runnersRegions: Set<string>, metrics: ScaleDownMetrics): Promise<void> {
try {
for (const awsRegion of runnersRegions) {
const ssmParams = sortSSMParametersByUpdateTime(
Array.from((await listSSMParameters(metrics, awsRegion)).values()),
);

let deleted = 0;
for (const ssmParam of ssmParams) {
/* istanbul ignore next */
if (ssmParam.Name === undefined) {
continue;
}
if (ssmParam.LastModifiedDate === undefined) {
break;
}
if (
ssmParam.LastModifiedDate.getTime() >
moment().subtract(Config.Instance.sSMParamCleanupAgeDays, 'days').toDate().getTime()
) {
break;
}
if (await doDeleteSSMParameter(ssmParam.Name, metrics, awsRegion)) {
deleted += 1;
}
if (deleted >= Config.Instance.sSMParamMaxCleanupAllowance) {
break;
}
}

if (deleted > 0) {
console.info(`Deleted ${deleted} old SSM parameters in ${awsRegion}`);
}
}
} catch (e) {
/* istanbul ignore next */
console.error('Failed to cleanup old SSM parameters', e);
}
}

export async function getGHRunnerOrg(ec2runner: RunnerInfo, metrics: ScaleDownMetrics): Promise<GhRunner | undefined> {
const org = ec2runner.org as string;
let ghRunner: GhRunner | undefined = undefined;
Expand Down
Loading