anomalyco · ZehuaWang · May 24, 2026 · May 25, 2026 · May 25, 2026
diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts
@@ -26,7 +26,30 @@ export function isOverflow(input: {
   if (input.cfg.compaction?.auto === false) return false
   if (input.model.limit.context === 0) return false
 
-  const count =
-    input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
-  return count >= usable(input)
+  return tokenCount(input.tokens) >= usable(input)
+}
+
+export function tokenCount(tokens: MessageV2.Assistant["tokens"]) {
+  return tokens.total || tokens.input + tokens.output + tokens.cache.read + tokens.cache.write
+}
+
+// Returns true if a previous auto-compaction triggered at `previousTokens` did
+// not reduce reported token usage by at least (1 - threshold). Defaults to a
+// 5% reduction; anything less than that signals compaction is not making
+// progress — typically because the configured context window is smaller than
+// what the provider actually serves, so auto-compaction would loop forever.
+//
+// An exactly-(1-threshold) reduction (e.g. 200K → 190K at the default) counts
+// as progress and does NOT trip the guard.
+export function autoCompactStalled(input: {
+  previousTokens: number | undefined
+  currentTokens: number
+  threshold?: number
+}) {
+  if (input.previousTokens === undefined) return false
+  // Provider-error compactions don't populate tokens (no step-finish runs),
+  // so we see 0 → 0. With no progress signal, treat as stalled — otherwise
+  // the percentage check below would keep returning false forever.
+  if (input.previousTokens === 0 && input.currentTokens === 0) return true
+  return input.currentTokens > input.previousTokens * (input.threshold ?? 0.95)
 }
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
@@ -11,6 +11,7 @@ import { ModelID, ProviderID } from "../provider/schema"
 import { type Tool as AITool, tool, jsonSchema } from "ai"
 import type { JSONSchema7 } from "@ai-sdk/provider"
 import { SessionCompaction } from "./compaction"
+import { autoCompactStalled, tokenCount } from "./overflow"
 import { Bus } from "../bus"
 import { SystemPrompt } from "./system"
 import { Instruction } from "./instruction"
@@ -1242,6 +1243,12 @@ export const layer = Layer.effect(
         const slog = elog.with({ sessionID })
         let structured: unknown
         let step = 0
+        // Token count reported when the previous auto-compaction was triggered
+        // in this run. Used to detect a stalled compaction loop — e.g. when the
+        // model's configured context window is smaller than what the provider
+        // actually serves, so isOverflow stays true after every compaction and
+        // we'd otherwise spin forever. See autoCompactStalled in overflow.ts.
+        let prevAutoCompactTokens: number | undefined
         const session = yield* sessions.get(sessionID).pipe(Effect.orDie)
 
         while (true) {
@@ -1303,13 +1310,33 @@ export const layer = Layer.effect(
             continue
           }
 
-          if (
-            lastFinished &&
-            lastFinished.summary !== true &&
-            (yield* compaction.isOverflow({ tokens: lastFinished.tokens, model }))
-          ) {
-            yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true })
-            continue
+          if (lastFinished && lastFinished.summary !== true) {
+            const overflowing = yield* compaction.isOverflow({ tokens: lastFinished.tokens, model })
+            if (overflowing) {
+              const current = tokenCount(lastFinished.tokens)
+              if (autoCompactStalled({ previousTokens: prevAutoCompactTokens, currentTokens: current })) {
+                const error = new MessageV2.ContextOverflowError({
+                  message:
+                    `Auto-compaction made no meaningful progress ` +
+                    `(${prevAutoCompactTokens} → ${current} tokens reported). ` +
+                    `Aborting to prevent an infinite loop. The model's configured context window ` +
+                    `may be smaller than what the provider actually serves — consider disabling ` +
+                    `auto-compaction in your config, or update the model's context limit.`,
+                })
+                yield* slog.warn("auto-compact loop guard tripped", {
+                  previousTokens: prevAutoCompactTokens,
+                  currentTokens: current,
+                  modelID: model.id,
+                  providerID: model.providerID,
+                })
+                yield* bus.publish(Session.Event.Error, { sessionID, error: error.toObject() })
+                throw error
+              }
+              prevAutoCompactTokens = current
+              yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true })
+              continue
+            }
+            prevAutoCompactTokens = undefined
           }
 
           const agent = yield* agents.get(lastUser.agent)
@@ -1459,6 +1486,26 @@ export const layer = Layer.effect(
 
             if (result === "stop") return "break" as const
             if (result === "compact") {
+              const current = tokenCount(handle.message.tokens)
+              if (autoCompactStalled({ previousTokens: prevAutoCompactTokens, currentTokens: current })) {
+                const error = new MessageV2.ContextOverflowError({
+                  message:
+                    `Auto-compaction made no meaningful progress ` +
+                    `(${prevAutoCompactTokens} → ${current} tokens reported). ` +
+                    `Aborting to prevent an infinite loop. The model's configured context window ` +
+                    `may be smaller than what the provider actually serves — consider disabling ` +
+                    `auto-compaction in your config, or update the model's context limit.`,
+                })
+                yield* slog.warn("auto-compact loop guard tripped (post-call)", {
+                  previousTokens: prevAutoCompactTokens,
+                  currentTokens: current,
+                  modelID: model.id,
+                  providerID: model.providerID,
+                })
+                yield* bus.publish(Session.Event.Error, { sessionID, error: error.toObject() })
+                throw error
+              }
+              prevAutoCompactTokens = current
               yield* compaction.create({
                 sessionID,
                 agent: lastUser.agent,

diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts
@@ -8,6 +8,7 @@ import { Image } from "@/image/image"
 import { Agent } from "../../src/agent/agent"
 import { LLM } from "../../src/session/llm"
 import { SessionCompaction } from "../../src/session/compaction"
+import { autoCompactStalled, tokenCount } from "../../src/session/overflow"
 import { Token } from "@/util/token"
 import * as Log from "@opencode-ai/core/util/log"
 import { Permission } from "../../src/permission"
@@ -558,6 +559,72 @@ describe("session.compaction.isOverflow", () => {
   )
 })
 
+describe("session.overflow.tokenCount", () => {
+  test("prefers tokens.total when provided", () => {
+    const tokens = { total: 12_345, input: 1, output: 2, reasoning: 3, cache: { read: 4, write: 5 } }
+    expect(tokenCount(tokens)).toBe(12_345)
+  })
+
+  test("sums input + output + cache.read + cache.write when total is missing", () => {
+    // reasoning is intentionally not summed — preserves the existing isOverflow accounting.
+    const tokens = { input: 100, output: 50, reasoning: 25, cache: { read: 30, write: 20 } }
+    expect(tokenCount(tokens)).toBe(200)
+  })
+
+  test("sums when total is 0 (falsy)", () => {
+    const tokens = { total: 0, input: 10, output: 20, reasoning: 5, cache: { read: 3, write: 2 } }
+    expect(tokenCount(tokens)).toBe(35)
+  })
+})
+
+describe("session.overflow.autoCompactStalled", () => {
+  test("returns false on the first auto-compaction (no prior token count)", () => {
+    expect(autoCompactStalled({ previousTokens: undefined, currentTokens: 200_000 })).toBe(false)
+  })
+
+  test("returns true when token count did not drop (bug #28543 repro)", () => {
+    expect(autoCompactStalled({ previousTokens: 236_900, currentTokens: 236_900 })).toBe(true)
+  })
+
+  test("returns true when reduction is below the 5% default threshold", () => {
+    // previous=200K, after compaction current=195K, reduction=2.5% → still stalled
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 195_000 })).toBe(true)
+  })
+
+  test("returns false when reduction is at or above the 5% default threshold", () => {
+    // previous=200K, after compaction current=180K, reduction=10% → healthy
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 180_000 })).toBe(false)
+  })
+
+  test("counts an exactly-threshold reduction as progress (not stalled)", () => {
+    // previous=200K, current=190K = exactly a 5% reduction. The PR semantics
+    // require "at least 5% reduction" to escape the stall classification, and
+    // 5% satisfies "at least 5%" — so the guard must NOT trip here.
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_000 })).toBe(false)
+    // current=190_001 — reduction is 4.9995% (strictly less than 5%) → stalled.
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_001 })).toBe(true)
+  })
+
+  test("honors a custom threshold override", () => {
+    // With threshold=0.5 we require a 50% reduction
+    expect(autoCompactStalled({ previousTokens: 100_000, currentTokens: 60_000, threshold: 0.5 })).toBe(true)
+    expect(autoCompactStalled({ previousTokens: 100_000, currentTokens: 40_000, threshold: 0.5 })).toBe(false)
+  })
+
+  test("returns true when current somehow exceeds previous", () => {
+    expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 240_000 })).toBe(true)
+  })
+
+  test("counts repeated zero-token compactions as stalled", () => {
+    // The provider-error compaction path (ContextOverflowError caught in
+    // SessionProcessor.halt) skips step-finish, so handle.message.tokens
+    // stays at the zero-initialized value. Two such triggers in a row give
+    // us no progress signal at all — without this guard the percentage
+    // check would keep returning false (0 > 0 * 0.95 = false) forever.
+    expect(autoCompactStalled({ previousTokens: 0, currentTokens: 0 })).toBe(true)
+  })
+})
+
 describe("session.compaction.create", () => {
   it.live(
     "creates a compaction user message and part",

diff --git a/packages/opencode/test/session/prompt.test.ts b/packages/opencode/test/session/prompt.test.ts
@@ -2314,3 +2314,82 @@ noLLMServer.instance(
     }),
   30_000,
 )
+
+// Regression test for #28543: when the model's configured context window is
+// smaller than what the provider actually serves, every successful model call
+// keeps reporting "overflowing" token counts. Without a loop guard the runner
+// keeps re-triggering auto-compaction even though it cannot reduce usage; with
+// the guard it bails with a typed ContextOverflowError after the second stalled
+// attempt.
+it.instance(
+  "auto-compact loop guard breaks when compaction makes no progress",
+  () =>
+    Effect.gen(function* () {
+      const { llm } = yield* useServerConfig(providerCfg)
+      const prompt = yield* SessionPrompt.Service
+      const sessions = yield* Session.Service
+      const sessionSvc = yield* Session.Service
+      const session = yield* sessions.create({
+        title: "Loop guard repro",
+        permission: [{ permission: "*", pattern: "*", action: "allow" }],
+      })
+
+      // Test model: context=100K, output=10K → usable = 90K. A reported
+      // count of 90_001 tokens is just over the overflow bar.
+      const overflow = { input: 80_000, output: 10_001 }
+
+      // Seed a prior finished assistant whose reported tokens already
+      // overflow. This mimics the real-world pattern where the previous turn
+      // already consumed more "tokens" than the model's misconfigured limit.
+      const seedUser = yield* user(session.id, "earlier prompt")
+      yield* sessionSvc.updateMessage({
+        id: MessageID.ascending(),
+        role: "assistant",
+        parentID: seedUser.id,
+        sessionID: session.id,
+        mode: "build",
+        agent: "build",
+        cost: 0,
+        path: { cwd: "/tmp", root: "/tmp" },
+        tokens: {
+          input: overflow.input,
+          output: overflow.output,
+          reasoning: 0,
+          cache: { read: 0, write: 0 },
+          total: overflow.input + overflow.output,
+        },
+        modelID: ref.modelID,
+        providerID: ref.providerID,
+        time: { created: Date.now() },
+        finish: "stop",
+      })
+
+      yield* prompt.prompt({
+        sessionID: session.id,
+        agent: "build",
+        noReply: true,
+        parts: [{ type: "text", text: "next prompt" }],
+      })
+
+      // 1) compaction summary call — small response so the summary itself
+      //    finishes cleanly. summary=true marks this message so it never
+      //    triggers the pre-call overflow check.
+      yield* llm.text("compaction summary", { usage: { input: 1, output: 1 } })
+      // 2) post-compact continue turn — STILL reports overflow → the
+      //    processor sets needsCompaction=true → the post-call guard trips.
+      yield* llm.text("second response (still overflowing)", { usage: overflow })
+
+      const exit = yield* prompt.loop({ sessionID: session.id }).pipe(Effect.exit)
+
+      expect(Exit.isFailure(exit)).toBe(true)
+      if (Exit.isFailure(exit)) {
+        const err = Cause.squash(exit.cause)
+        expect(MessageV2.ContextOverflowError.isInstance(err)).toBe(true)
+        if (MessageV2.ContextOverflowError.isInstance(err)) {
+          expect(err.data.message).toContain("Auto-compaction made no meaningful progress")
+        }
+      }
+    }),
+  30_000,
+)
+