pytorch · seemethere · Jul 7, 2025 · Jul 7, 2025 · ZainRizvi · Jul 14, 2025
diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/lambda.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/lambda.ts
@@ -7,11 +7,13 @@ import {
   ScaleUpChronMetrics,
   sendMetricsAtTimeout,
   sendMetricsTimeoutVars,
+  ScaleCycleMetrics,
 } from './scale-runners/metrics';
 import { getDelayWithJitterRetryCount, stochaticRunOvershoot } from './scale-runners/utils';
 import { scaleDown as scaleDownR } from './scale-runners/scale-down';
 import { scaleUpChron as scaleUpChronR } from './scale-runners/scale-up-chron';
 import { sqsSendMessages, sqsDeleteMessageBatch } from './scale-runners/sqs';
+import { scaleCycle as scaleCycleR } from './scale-runners/scale-cycle';
 
 async function sendRetryEvents(evtFailed: Array<[SQSRecord, boolean, number]>, metrics: ScaleUpMetrics) {
   console.error(`Detected ${evtFailed.length} errors when processing messages, will retry relevant messages.`);
@@ -202,3 +204,38 @@ export async function scaleUpChron(event: ScheduledEvent, context: Context, call
   }
   callback(callbackOutput);
 }
+
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+export async function scaleCycle(event: ScheduledEvent, context: Context, callback: any) {
+  // we mantain open connections to redis, so the event pool is only cleaned when the SIGTERM is sent
+  context.callbackWaitsForEmptyEventLoop = false;
+
+  const metrics = new ScaleCycleMetrics();
+  const sndMetricsTimout: sendMetricsTimeoutVars = {
+    metrics: metrics,
+  };
+  sndMetricsTimout.setTimeout = setTimeout(
+    sendMetricsAtTimeout(sndMetricsTimout),
+    (Config.Instance.lambdaTimeout - 10) * 1000,
+  );
+
+  let callbackOutput: string | null = null;
+
+  try {
+    await scaleCycleR(metrics);
+  } catch (e) {
+    console.error(e);
+    callbackOutput = `Failed to scale cycle: ${e}`;
+  } finally {
+    try {
+      clearTimeout(sndMetricsTimout.setTimeout);
+      sndMetricsTimout.metrics = undefined;
+      sndMetricsTimout.setTimeout = undefined;
+      await metrics.sendMetrics();
+    } catch (e) {
+      callbackOutput = `Error sending metrics: ${e}`;
+    }
+  }
+
+  callback(callbackOutput);
+}
diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/metrics.ts
@@ -1813,3 +1813,30 @@ export function sendMetricsAtTimeout(metricsTimeouts: sendMetricsTimeoutVars) {
     }
   };
 }
+
+export class ScaleCycleMetrics extends ScaleUpMetrics {
+  constructor() {
+    super('scaleCycle');
+  }
+
+  scaleCycleRunnerReuseFound(runnerType: string) {
+    const dimensions = new Map([['RunnerType', runnerType]]);
+    this.countEntry('run.scaleCycle.runnerReuse.found', 1, dimensions);
+  }
+
+  scaleCycleRunnerReuseFoundOrg(org: string, runnerType: string) {
+    const dimensions = new Map([
+      ['Org', org],
+      ['RunnerType', runnerType],
+    ]);
+    this.countEntry('run.scaleCycle.runnerReuse.found.org', 1, dimensions);
+  }
+
+  scaleCycleRunnerReuseFoundRepo(repo: string, runnerType: string) {
+    const dimensions = new Map([
+      ['Repo', repo],
+      ['RunnerType', runnerType],
+    ]);
+    this.countEntry('run.scaleCycle.runnerReuse.found.repo', 1, dimensions);
+  }
+}
diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-cycle.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-cycle.ts
@@ -0,0 +1,77 @@
+import { Config } from './config';
+import { listRunners, RunnerInputParameters, tryReuseRunner } from './runners';
+import { getRepo, getRepoKey } from './utils';
+import { ScaleCycleMetrics } from './metrics';
+import { getRunnerTypes } from './gh-runners';
+import { createRunnerConfigArgument } from './scale-up';
+
+export async function scaleCycle(metrics: ScaleCycleMetrics) {
+  // Get runner types configuration first
+  const scaleConfigRepo = getRepo(Config.Instance.scaleConfigOrg, Config.Instance.scaleConfigRepo);
+  const runnerTypes = await getRunnerTypes(scaleConfigRepo, metrics);
+
+  // Get all valid runner type names for filtering
+  const validRunnerTypeNames = Array.from(runnerTypes.keys());
+
+  // Make separate calls for each runner type to filter at EC2 level
+  const allRunners = await Promise.all(
+    validRunnerTypeNames.map((runnerTypeName) =>
+      listRunners(metrics, {
+        containsTags: ['GithubRunnerID', 'EphemeralRunnerFinished', 'RunnerType'],
+        runnerType: runnerTypeName,
+      }),
+    ),
+  );
+
+  // Flatten the results
+  const runners = allRunners.flat();
+
+  for (const runner of runners) {
+    // Skip if required fields are missing (org/repo still need to be checked)
+    if (!runner.runnerType || !runner.org || !runner.repo) {
+      console.warn(`Skipping runner ${runner.instanceId} due to missing required tags`);
+      continue;
+    }
+
+    // Get the RunnerType object from the string (we know it exists since we filtered by it)
+    const runnerType = runnerTypes.get(runner.runnerType);
+    if (!runnerType) {
+      console.warn(`Unknown runner type: ${runner.runnerType}, skipping`);
+      continue;
+    }
+
+    // Create repo object
+    const repo = getRepo(runner.org, runner.repo);
+
+    // For each runner send an EBS volume replacement task
+    const runnerInputParameters: RunnerInputParameters = {
+      runnerConfig: (awsRegion: string, experimentalRunner: boolean) => {
+        return createRunnerConfigArgument(
+          runnerType,
+          repo,
+          // NOTE: installationId can actually be undefined here but this may incur lower rate limits
+          // TODO: figure out if we need to pass an actual installationId here
+          undefined,
+          metrics,
+          awsRegion,
+          experimentalRunner,
+        );
+      },
+      environment: Config.Instance.environment,
+      runnerType: runnerType,
+    };
+
+    // Set orgName or repoName based on configuration
+    if (Config.Instance.enableOrganizationRunners) {
+      runnerInputParameters.orgName = runner.org;
+      metrics.scaleCycleRunnerReuseFoundOrg(runner.org, runner.runnerType);
+      console.info(`Reusing runner ${runner.instanceId} for ${runner.org}`);
+    } else {
+      runnerInputParameters.repoName = getRepoKey(repo);
+      metrics.scaleCycleRunnerReuseFoundRepo(getRepoKey(repo), runner.runnerType);
+      console.info(`Reusing runner ${runner.instanceId} for ${getRepoKey(repo)}`);
+    }
+
+    await tryReuseRunner(runnerInputParameters, metrics);
+  }
+}
diff --git a/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts b/terraform-aws-github-runner/modules/runners/lambdas/runners/src/scale-runners/scale-up.ts
@@ -163,7 +163,7 @@ export async function scaleUp(
   }
 }
 
-async function createRunnerConfigArgument(
+export async function createRunnerConfigArgument(
   runnerType: RunnerType,
   repo: Repo,
   installationId: number | undefined,