Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions packages/opencode/src/session/overflow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,30 @@ export function isOverflow(input: {
if (input.cfg.compaction?.auto === false) return false
if (input.model.limit.context === 0) return false

const count =
input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
return count >= usable(input)
return tokenCount(input.tokens) >= usable(input)
}

export function tokenCount(tokens: MessageV2.Assistant["tokens"]) {
return tokens.total || tokens.input + tokens.output + tokens.cache.read + tokens.cache.write
}

// Returns true if a previous auto-compaction triggered at `previousTokens` did
// not reduce reported token usage by at least (1 - threshold). Defaults to a
// 5% reduction; anything less than that signals compaction is not making
// progress — typically because the configured context window is smaller than
// what the provider actually serves, so auto-compaction would loop forever.
//
// An exactly-(1-threshold) reduction (e.g. 200K → 190K at the default) counts
// as progress and does NOT trip the guard.
export function autoCompactStalled(input: {
previousTokens: number | undefined
currentTokens: number
threshold?: number
}) {
if (input.previousTokens === undefined) return false
// Provider-error compactions don't populate tokens (no step-finish runs),
// so we see 0 → 0. With no progress signal, treat as stalled — otherwise
// the percentage check below would keep returning false forever.
if (input.previousTokens === 0 && input.currentTokens === 0) return true
return input.currentTokens > input.previousTokens * (input.threshold ?? 0.95)
}
61 changes: 54 additions & 7 deletions packages/opencode/src/session/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { ModelID, ProviderID } from "../provider/schema"
import { type Tool as AITool, tool, jsonSchema } from "ai"
import type { JSONSchema7 } from "@ai-sdk/provider"
import { SessionCompaction } from "./compaction"
import { autoCompactStalled, tokenCount } from "./overflow"
import { Bus } from "../bus"
import { SystemPrompt } from "./system"
import { Instruction } from "./instruction"
Expand Down Expand Up @@ -1242,6 +1243,12 @@ export const layer = Layer.effect(
const slog = elog.with({ sessionID })
let structured: unknown
let step = 0
// Token count reported when the previous auto-compaction was triggered
// in this run. Used to detect a stalled compaction loop — e.g. when the
// model's configured context window is smaller than what the provider
// actually serves, so isOverflow stays true after every compaction and
// we'd otherwise spin forever. See autoCompactStalled in overflow.ts.
let prevAutoCompactTokens: number | undefined
const session = yield* sessions.get(sessionID).pipe(Effect.orDie)

while (true) {
Expand Down Expand Up @@ -1303,13 +1310,33 @@ export const layer = Layer.effect(
continue
}

if (
lastFinished &&
lastFinished.summary !== true &&
(yield* compaction.isOverflow({ tokens: lastFinished.tokens, model }))
) {
yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true })
continue
if (lastFinished && lastFinished.summary !== true) {
const overflowing = yield* compaction.isOverflow({ tokens: lastFinished.tokens, model })
if (overflowing) {
const current = tokenCount(lastFinished.tokens)
if (autoCompactStalled({ previousTokens: prevAutoCompactTokens, currentTokens: current })) {
const error = new MessageV2.ContextOverflowError({
message:
`Auto-compaction made no meaningful progress ` +
`(${prevAutoCompactTokens} → ${current} tokens reported). ` +
`Aborting to prevent an infinite loop. The model's configured context window ` +
`may be smaller than what the provider actually serves — consider disabling ` +
`auto-compaction in your config, or update the model's context limit.`,
})
yield* slog.warn("auto-compact loop guard tripped", {
previousTokens: prevAutoCompactTokens,
currentTokens: current,
modelID: model.id,
providerID: model.providerID,
})
yield* bus.publish(Session.Event.Error, { sessionID, error: error.toObject() })
throw error
}
prevAutoCompactTokens = current
yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true })
continue
}
prevAutoCompactTokens = undefined
}

const agent = yield* agents.get(lastUser.agent)
Expand Down Expand Up @@ -1459,6 +1486,26 @@ export const layer = Layer.effect(

if (result === "stop") return "break" as const
if (result === "compact") {
const current = tokenCount(handle.message.tokens)
if (autoCompactStalled({ previousTokens: prevAutoCompactTokens, currentTokens: current })) {
const error = new MessageV2.ContextOverflowError({
message:
`Auto-compaction made no meaningful progress ` +
`(${prevAutoCompactTokens} → ${current} tokens reported). ` +
`Aborting to prevent an infinite loop. The model's configured context window ` +
`may be smaller than what the provider actually serves — consider disabling ` +
`auto-compaction in your config, or update the model's context limit.`,
})
yield* slog.warn("auto-compact loop guard tripped (post-call)", {
previousTokens: prevAutoCompactTokens,
currentTokens: current,
modelID: model.id,
providerID: model.providerID,
})
yield* bus.publish(Session.Event.Error, { sessionID, error: error.toObject() })
throw error
}
prevAutoCompactTokens = current
yield* compaction.create({
sessionID,
agent: lastUser.agent,
Expand Down
67 changes: 67 additions & 0 deletions packages/opencode/test/session/compaction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { Image } from "@/image/image"
import { Agent } from "../../src/agent/agent"
import { LLM } from "../../src/session/llm"
import { SessionCompaction } from "../../src/session/compaction"
import { autoCompactStalled, tokenCount } from "../../src/session/overflow"
import { Token } from "@/util/token"
import * as Log from "@opencode-ai/core/util/log"
import { Permission } from "../../src/permission"
Expand Down Expand Up @@ -558,6 +559,72 @@ describe("session.compaction.isOverflow", () => {
)
})

describe("session.overflow.tokenCount", () => {
test("prefers tokens.total when provided", () => {
const tokens = { total: 12_345, input: 1, output: 2, reasoning: 3, cache: { read: 4, write: 5 } }
expect(tokenCount(tokens)).toBe(12_345)
})

test("sums input + output + cache.read + cache.write when total is missing", () => {
// reasoning is intentionally not summed — preserves the existing isOverflow accounting.
const tokens = { input: 100, output: 50, reasoning: 25, cache: { read: 30, write: 20 } }
expect(tokenCount(tokens)).toBe(200)
})

test("sums when total is 0 (falsy)", () => {
const tokens = { total: 0, input: 10, output: 20, reasoning: 5, cache: { read: 3, write: 2 } }
expect(tokenCount(tokens)).toBe(35)
})
})

describe("session.overflow.autoCompactStalled", () => {
test("returns false on the first auto-compaction (no prior token count)", () => {
expect(autoCompactStalled({ previousTokens: undefined, currentTokens: 200_000 })).toBe(false)
})

test("returns true when token count did not drop (bug #28543 repro)", () => {
expect(autoCompactStalled({ previousTokens: 236_900, currentTokens: 236_900 })).toBe(true)
})

test("returns true when reduction is below the 5% default threshold", () => {
// previous=200K, after compaction current=195K, reduction=2.5% → still stalled
expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 195_000 })).toBe(true)
})

test("returns false when reduction is at or above the 5% default threshold", () => {
// previous=200K, after compaction current=180K, reduction=10% → healthy
expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 180_000 })).toBe(false)
})

test("counts an exactly-threshold reduction as progress (not stalled)", () => {
// previous=200K, current=190K = exactly a 5% reduction. The PR semantics
// require "at least 5% reduction" to escape the stall classification, and
// 5% satisfies "at least 5%" — so the guard must NOT trip here.
expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_000 })).toBe(false)
// current=190_001 — reduction is 4.9995% (strictly less than 5%) → stalled.
expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 190_001 })).toBe(true)
})

test("honors a custom threshold override", () => {
// With threshold=0.5 we require a 50% reduction
expect(autoCompactStalled({ previousTokens: 100_000, currentTokens: 60_000, threshold: 0.5 })).toBe(true)
expect(autoCompactStalled({ previousTokens: 100_000, currentTokens: 40_000, threshold: 0.5 })).toBe(false)
})

test("returns true when current somehow exceeds previous", () => {
expect(autoCompactStalled({ previousTokens: 200_000, currentTokens: 240_000 })).toBe(true)
})

test("counts repeated zero-token compactions as stalled", () => {
// The provider-error compaction path (ContextOverflowError caught in
// SessionProcessor.halt) skips step-finish, so handle.message.tokens
// stays at the zero-initialized value. Two such triggers in a row give
// us no progress signal at all — without this guard the percentage
// check would keep returning false (0 > 0 * 0.95 = false) forever.
expect(autoCompactStalled({ previousTokens: 0, currentTokens: 0 })).toBe(true)
})
})

describe("session.compaction.create", () => {
it.live(
"creates a compaction user message and part",
Expand Down
79 changes: 79 additions & 0 deletions packages/opencode/test/session/prompt.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2314,3 +2314,82 @@ noLLMServer.instance(
}),
30_000,
)

// Regression test for #28543: when the model's configured context window is
// smaller than what the provider actually serves, every successful model call
// keeps reporting "overflowing" token counts. Without a loop guard the runner
// keeps re-triggering auto-compaction even though it cannot reduce usage; with
// the guard it bails with a typed ContextOverflowError after the second stalled
// attempt.
it.instance(
"auto-compact loop guard breaks when compaction makes no progress",
() =>
Effect.gen(function* () {
const { llm } = yield* useServerConfig(providerCfg)
const prompt = yield* SessionPrompt.Service
const sessions = yield* Session.Service
const sessionSvc = yield* Session.Service
const session = yield* sessions.create({
title: "Loop guard repro",
permission: [{ permission: "*", pattern: "*", action: "allow" }],
})

// Test model: context=100K, output=10K → usable = 90K. A reported
// count of 90_001 tokens is just over the overflow bar.
const overflow = { input: 80_000, output: 10_001 }

// Seed a prior finished assistant whose reported tokens already
// overflow. This mimics the real-world pattern where the previous turn
// already consumed more "tokens" than the model's misconfigured limit.
const seedUser = yield* user(session.id, "earlier prompt")
yield* sessionSvc.updateMessage({
id: MessageID.ascending(),
role: "assistant",
parentID: seedUser.id,
sessionID: session.id,
mode: "build",
agent: "build",
cost: 0,
path: { cwd: "/tmp", root: "/tmp" },
tokens: {
input: overflow.input,
output: overflow.output,
reasoning: 0,
cache: { read: 0, write: 0 },
total: overflow.input + overflow.output,
},
modelID: ref.modelID,
providerID: ref.providerID,
time: { created: Date.now() },
finish: "stop",
})

yield* prompt.prompt({
sessionID: session.id,
agent: "build",
noReply: true,
parts: [{ type: "text", text: "next prompt" }],
})

// 1) compaction summary call — small response so the summary itself
// finishes cleanly. summary=true marks this message so it never
// triggers the pre-call overflow check.
yield* llm.text("compaction summary", { usage: { input: 1, output: 1 } })
// 2) post-compact continue turn — STILL reports overflow → the
// processor sets needsCompaction=true → the post-call guard trips.
yield* llm.text("second response (still overflowing)", { usage: overflow })

const exit = yield* prompt.loop({ sessionID: session.id }).pipe(Effect.exit)

expect(Exit.isFailure(exit)).toBe(true)
if (Exit.isFailure(exit)) {
const err = Cause.squash(exit.cause)
expect(MessageV2.ContextOverflowError.isInstance(err)).toBe(true)
if (MessageV2.ContextOverflowError.isInstance(err)) {
expect(err.data.message).toContain("Auto-compaction made no meaningful progress")
}
}
}),
30_000,
)

Loading