diff --git a/packages/opencode/src/session/retry.ts b/packages/opencode/src/session/retry.ts index 078319519fdd..a8cc0bc33ca1 100644 --- a/packages/opencode/src/session/retry.ts +++ b/packages/opencode/src/session/retry.ts @@ -62,7 +62,12 @@ export function delay(attempt: number, error?: MessageV2.APIError) { return cap(Math.min(RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1), RETRY_MAX_DELAY_NO_HEADERS)) } -export function retryable(error: Err) { +// Branch order matches legacy retryable(): ContextOverflow -> APIError -> plain-text +// rate-limit -> transport -> JSON. An error message like "rate limit exceeded +// (ETIMEDOUT during retry)" must stay classified as rate-limit (not transport) +// for message semantics, but we still honor TRANSPORT_RETRY_CAP via isTransport +// when the message also matches a transport pattern. +export function classify(error: Err) { // context overflow errors should not be retried if (MessageV2.ContextOverflowError.isInstance(error)) return undefined if (MessageV2.APIError.isInstance(error)) { @@ -70,10 +75,16 @@ export function retryable(error: Err) { // 5xx errors are transient server failures and should always be retried, // even when the provider SDK doesn't explicitly mark them as retryable. if (!error.data.isRetryable && !(status !== undefined && status >= 500)) return undefined - if (error.data.responseBody?.includes("FreeUsageLimitError")) return GO_UPSELL_MESSAGE - return error.data.message.includes("Overloaded") ? "Provider is overloaded" : error.data.message + if (error.data.responseBody?.includes("FreeUsageLimitError")) { + return { message: GO_UPSELL_MESSAGE } + } + return { + message: error.data.message.includes("Overloaded") ? "Provider is overloaded" : error.data.message, + } } + const transport = transportMessage(error) + // Check for rate limit patterns in plain text error messages const msg = error.data?.message if (typeof msg === "string") { @@ -83,12 +94,12 @@ export function retryable(error: Err) { lower.includes("rate limit") || lower.includes("too many requests") ) { - return msg + if (transport) return { message: msg, isTransport: true as const } + return { message: msg } } } - const transport = transportMessage(error) - if (transport) return transport + if (transport) return { message: transport, isTransport: true as const } const json = iife(() => { try { @@ -106,17 +117,22 @@ export function retryable(error: Err) { const code = typeof json.code === "string" ? json.code : "" if (json.type === "error" && json.error?.type === "too_many_requests") { - return "Too Many Requests" + return { message: "Too Many Requests" } } if (code.includes("exhausted") || code.includes("unavailable")) { - return "Provider is overloaded" + return { message: "Provider is overloaded" } } if (json.type === "error" && typeof json.error?.code === "string" && json.error.code.includes("rate_limit")) { - return "Rate Limited" + return { message: "Rate Limited" } } return undefined } +// Kept to avoid churning the existing retry.test.ts suite. Prefer classify() in new code. +export function retryable(error: Err) { + return classify(error)?.message +} + export function policy(opts: { parse: (error: unknown) => Err set: (input: { attempt: number; message: string; next: number }) => Effect.Effect @@ -124,16 +140,15 @@ export function policy(opts: { return Schedule.fromStepWithMetadata( Effect.succeed((meta: Schedule.InputMetadata) => { const error = opts.parse(meta.input) - const message = retryable(error) - const transport = transportMessage(error) - if (!message) return Cause.done(meta.attempt) - if (transport && !MessageV2.APIError.isInstance(error) && meta.attempt > TRANSPORT_RETRY_CAP) { + const c = classify(error) + if (!c) return Cause.done(meta.attempt) + if (c.isTransport && !MessageV2.APIError.isInstance(error) && meta.attempt > TRANSPORT_RETRY_CAP) { return Cause.done(meta.attempt) } return Effect.gen(function* () { const wait = delay(meta.attempt, MessageV2.APIError.isInstance(error) ? error : undefined) const now = yield* Clock.currentTimeMillis - yield* opts.set({ attempt: meta.attempt, message, next: now + wait }) + yield* opts.set({ attempt: meta.attempt, message: c.message, next: now + wait }) return [meta.attempt, Duration.millis(wait)] as [number, Duration.Duration] }) }), diff --git a/packages/opencode/test/session/retry-classification.test.ts b/packages/opencode/test/session/retry-classification.test.ts new file mode 100644 index 000000000000..8113e264fa61 --- /dev/null +++ b/packages/opencode/test/session/retry-classification.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, test } from "bun:test" +import { MessageV2 } from "../../src/session/message-v2" +import { SessionRetry } from "../../src/session/retry" + +describe("SessionRetry.classify", () => { + test("returns undefined for non-retryable context overflow", () => { + const err = new MessageV2.ContextOverflowError({ message: "too long" }).toObject() + expect(SessionRetry.classify(err)).toBeUndefined() + }) + + test("returns transport classification for SSEStallError", () => { + const err = new MessageV2.SSEStallError({ message: "SSE read timed out after 120000ms" }).toObject() + const out = SessionRetry.classify(err) + expect(out).toEqual({ message: "SSE read timed out after 120000ms", isTransport: true }) + }) + + test("returns non-transport classification for APIError 5xx", () => { + const err = new MessageV2.APIError({ + message: "upstream exploded", + statusCode: 503, + isRetryable: false, + }).toObject() + const out = SessionRetry.classify(err) + expect(out?.message).toBe("upstream exploded") + expect(out?.isTransport).toBeFalsy() + }) + + test("retryable() still returns a string for transport errors (legacy API)", () => { + const err = new MessageV2.SSEStallError({ message: "SSE read timed out after 120000ms" }).toObject() + expect(SessionRetry.retryable(err)).toBe("SSE read timed out after 120000ms") + }) + + test("classifies plain-text ETIMEDOUT transport error as isTransport", () => { + const err = { + _tag: "Error", + name: "UnknownError", + data: { message: "connect ETIMEDOUT 1.2.3.4:443" }, + } as unknown as Parameters[0] + const out = SessionRetry.classify(err) + expect(out).toEqual({ message: "connect ETIMEDOUT 1.2.3.4:443", isTransport: true }) + }) + + test("mixed rate-limit + transport message keeps rate-limit message AND transport cap", () => { + // Regression: legacy policy() treated any message matching a TRANSPORT_PATTERN + // as transport for cap purposes, even when the rate-limit branch picked the + // message. Preserve that cap behavior via isTransport. + const err = { + _tag: "Error", + name: "UnknownError", + data: { message: "rate limit exceeded (ETIMEDOUT during retry)" }, + } as unknown as Parameters[0] + const out = SessionRetry.classify(err) + expect(out?.message).toBe("rate limit exceeded (ETIMEDOUT during retry)") + expect(out?.isTransport).toBe(true) + }) + + test("APIError with overloaded message returns non-transport", () => { + const err = new MessageV2.APIError({ + message: "Overloaded", + statusCode: 529, + isRetryable: true, + }).toObject() + const out = SessionRetry.classify(err) + expect(out).toEqual({ message: "Provider is overloaded" }) + }) +})