Skip to content

Commit 811d290

Browse files
authored
Surface local backend detection and ultrathink config (#487)
- Probe Ollama and LM Studio when loading Codex config - Show backend detection badges in settings and model picker - Map ultrathink to SDK effort/max-thinking settings for Claude
1 parent 394b218 commit 811d290

17 files changed

+707
-34
lines changed

apps/server/src/provider/Layers/ClaudeAdapter.test.ts

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ describe("ClaudeAdapterLive", () => {
548548
);
549549
});
550550

551-
it.effect("treats ultrathink as a prompt keyword instead of a session effort", () => {
551+
it.effect("maps ultrathink on Sonnet 4.6 to effort=high with max thinking budget", () => {
552552
const harness = makeHarness();
553553
return Effect.gen(function* () {
554554
const adapter = yield* ClaudeAdapter;
@@ -577,7 +577,11 @@ describe("ClaudeAdapterLive", () => {
577577
});
578578

579579
const createInput = harness.getLastCreateQueryInput();
580-
assert.equal(createInput?.options.effort, undefined);
580+
// Sonnet 4.6 has no "max" effort, so ultrathink collapses to "high".
581+
assert.equal(createInput?.options.effort, "high");
582+
// Thinking budget is bumped to the ultrathink default.
583+
assert.equal(createInput?.options.maxThinkingTokens, 63999);
584+
// Prompt prefix is still applied on top of the SDK boost.
581585
const promptText = yield* Effect.promise(() => readFirstPromptText(createInput));
582586
assert.equal(promptText, "Ultrathink:\nInvestigate the edge cases");
583587
}).pipe(
@@ -586,6 +590,63 @@ describe("ClaudeAdapterLive", () => {
586590
);
587591
});
588592

593+
it.effect("maps ultrathink on Opus 4.7 to effort=max with max thinking budget", () => {
594+
const harness = makeHarness();
595+
return Effect.gen(function* () {
596+
const adapter = yield* ClaudeAdapter;
597+
yield* adapter.startSession({
598+
threadId: THREAD_ID,
599+
provider: "claudeAgent",
600+
model: "claude-opus-4-7",
601+
runtimeMode: "full-access",
602+
modelOptions: {
603+
claudeAgent: {
604+
effort: "ultrathink",
605+
},
606+
},
607+
});
608+
609+
const createInput = harness.getLastCreateQueryInput();
610+
// Opus 4.7 supports "max", so ultrathink gets the top effort level.
611+
assert.equal(createInput?.options.effort, "max");
612+
assert.equal(createInput?.options.maxThinkingTokens, 63999);
613+
}).pipe(
614+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
615+
Effect.provide(harness.layer),
616+
);
617+
});
618+
619+
it.effect("preserves user-provided maxThinkingTokens when higher than ultrathink default", () => {
620+
const harness = makeHarness();
621+
return Effect.gen(function* () {
622+
const adapter = yield* ClaudeAdapter;
623+
yield* adapter.startSession({
624+
threadId: THREAD_ID,
625+
provider: "claudeAgent",
626+
model: "claude-opus-4-7",
627+
runtimeMode: "full-access",
628+
providerOptions: {
629+
claudeAgent: {
630+
maxThinkingTokens: 90000,
631+
},
632+
},
633+
modelOptions: {
634+
claudeAgent: {
635+
effort: "ultrathink",
636+
},
637+
},
638+
});
639+
640+
const createInput = harness.getLastCreateQueryInput();
641+
assert.equal(createInput?.options.effort, "max");
642+
// User override is higher than the ultrathink default, so it passes through.
643+
assert.equal(createInput?.options.maxThinkingTokens, 90000);
644+
}).pipe(
645+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
646+
Effect.provide(harness.layer),
647+
);
648+
});
649+
589650
it.effect("embeds image attachments in Claude user messages", () => {
590651
const baseDir = mkdtempSync(path.join(os.tmpdir(), "claude-attachments-"));
591652
const harness = makeHarness({

apps/server/src/provider/Layers/ClaudeAdapter.ts

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ import {
4242
} from "@okcode/contracts";
4343
import {
4444
applyClaudePromptEffortPrefix,
45-
getEffectiveClaudeCodeEffort,
4645
getReasoningEffortOptions,
46+
resolveClaudeUltrathinkSdkConfig,
4747
resolveReasoningEffortForProvider,
4848
supportsClaudeFastMode,
4949
supportsClaudeThinkingToggle,
@@ -2811,7 +2811,12 @@ function makeClaudeAdapter(options?: ClaudeAdapterLiveOptions) {
28112811
supportsClaudeThinkingToggle(input.model)
28122812
? input.modelOptions.claudeAgent.thinking
28132813
: undefined;
2814-
const effectiveEffort = getEffectiveClaudeCodeEffort(effort);
2814+
const { effort: sdkEffort, maxThinkingTokens: sdkMaxThinkingTokens } =
2815+
resolveClaudeUltrathinkSdkConfig(
2816+
input.model,
2817+
effort,
2818+
providerOptions?.maxThinkingTokens ?? null,
2819+
);
28152820
const permissionMode =
28162821
toPermissionMode(providerOptions?.permissionMode) ??
28172822
(input.runtimeMode === "full-access" ? "bypassPermissions" : undefined);
@@ -2833,13 +2838,13 @@ function makeClaudeAdapter(options?: ClaudeAdapterLiveOptions) {
28332838
...(input.model ? { model: input.model } : {}),
28342839
pathToClaudeCodeExecutable: providerOptions?.binaryPath ?? "claude",
28352840
settingSources: [...CLAUDE_SETTING_SOURCES],
2836-
...(effectiveEffort ? { effort: effectiveEffort } : {}),
2841+
...(sdkEffort ? { effort: sdkEffort } : {}),
28372842
...(permissionMode ? { permissionMode } : {}),
28382843
...(permissionMode === "bypassPermissions"
28392844
? { allowDangerouslySkipPermissions: true }
28402845
: {}),
2841-
...(providerOptions?.maxThinkingTokens !== undefined
2842-
? { maxThinkingTokens: providerOptions.maxThinkingTokens }
2846+
...(sdkMaxThinkingTokens !== undefined
2847+
? { maxThinkingTokens: sdkMaxThinkingTokens }
28432848
: {}),
28442849
...(Object.keys(settings).length > 0 ? { settings } : {}),
28452850
...(existingResumeSessionId ? { resume: existingResumeSessionId } : {}),
@@ -2930,10 +2935,10 @@ function makeClaudeAdapter(options?: ClaudeAdapterLiveOptions) {
29302935
config: {
29312936
...(input.model ? { model: input.model } : {}),
29322937
...(input.cwd ? { cwd: input.cwd } : {}),
2933-
...(effectiveEffort ? { effort: effectiveEffort } : {}),
2938+
...(sdkEffort ? { effort: sdkEffort } : {}),
29342939
...(permissionMode ? { permissionMode } : {}),
2935-
...(providerOptions?.maxThinkingTokens !== undefined
2936-
? { maxThinkingTokens: providerOptions.maxThinkingTokens }
2940+
...(sdkMaxThinkingTokens !== undefined
2941+
? { maxThinkingTokens: sdkMaxThinkingTokens }
29372942
: {}),
29382943
...(fastMode ? { fastMode: true } : {}),
29392944
},

apps/server/src/provider/codexConfig.ts

Lines changed: 36 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,12 @@ import {
1111
import { Effect, FileSystem, Result } from "effect";
1212
import { parse as parseToml } from "toml";
1313

14+
import { probeCodexLocalBackends } from "./codexLocalBackendProbe.ts";
15+
1416
export interface CodexConfigReadOptions {
1517
readonly homePath?: string | null | undefined;
1618
readonly env?: NodeJS.ProcessEnv | undefined;
19+
readonly probeLocalBackends?: boolean | undefined;
1720
}
1821

1922
function emptyCodexConfigSummary(): ServerCodexConfigSummary {
@@ -182,19 +185,42 @@ export const readCodexConfigSummary = (options: CodexConfigReadOptions = {}) =>
182185
const fileSystem = yield* FileSystem.FileSystem;
183186
const configPath = resolveCodexConfigPath(options);
184187
const exists = yield* fileSystem.exists(configPath).pipe(Effect.orElseSucceed(() => false));
185-
if (!exists) {
186-
return emptyCodexConfigSummary();
187-
}
188188

189-
const content = yield* fileSystem.readFileString(configPath).pipe(Effect.result);
190-
if (Result.isFailure(content)) {
191-
return {
192-
...emptyCodexConfigSummary(),
193-
parseError: getParseErrorMessage(content.failure),
194-
};
189+
const baseSummary: ServerCodexConfigSummary = yield* Effect.gen(function* () {
190+
if (!exists) {
191+
return emptyCodexConfigSummary();
192+
}
193+
194+
const content = yield* fileSystem.readFileString(configPath).pipe(Effect.result);
195+
if (Result.isFailure(content)) {
196+
return {
197+
...emptyCodexConfigSummary(),
198+
parseError: getParseErrorMessage(content.failure),
199+
} satisfies ServerCodexConfigSummary;
200+
}
201+
202+
return summarizeCodexConfigToml(content.success);
203+
});
204+
205+
if (options.probeLocalBackends !== true) {
206+
return baseSummary;
195207
}
196208

197-
return summarizeCodexConfigToml(content.success);
209+
const probes = yield* probeCodexLocalBackends();
210+
211+
return {
212+
...baseSummary,
213+
detectedLocalBackends: {
214+
ollama:
215+
probes.ollama.modelCount !== undefined
216+
? { reachable: probes.ollama.reachable, modelCount: probes.ollama.modelCount }
217+
: { reachable: probes.ollama.reachable },
218+
lmstudio:
219+
probes.lmstudio.modelCount !== undefined
220+
? { reachable: probes.lmstudio.reachable, modelCount: probes.lmstudio.modelCount }
221+
: { reachable: probes.lmstudio.reachable },
222+
},
223+
} satisfies ServerCodexConfigSummary;
198224
});
199225

200226
export function usesOpenAiLoginForSelectedCodexBackend(summary: ServerCodexConfigSummary): boolean {
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import { Effect } from "effect";
2+
3+
export interface LocalBackendProbeResult {
4+
readonly reachable: boolean;
5+
readonly modelCount?: number;
6+
readonly error?: string;
7+
}
8+
9+
export interface LocalBackendProbes {
10+
readonly ollama: LocalBackendProbeResult;
11+
readonly lmstudio: LocalBackendProbeResult;
12+
}
13+
14+
const DEFAULT_PROBE_TIMEOUT_MS = 1_500;
15+
16+
const OLLAMA_TAGS_URL = "http://localhost:11434/api/tags";
17+
const LM_STUDIO_MODELS_URL = "http://localhost:1234/v1/models";
18+
19+
function isSuppressedByEnv(): boolean {
20+
const env = process.env;
21+
return env.OKCODE_DISABLE_LOCAL_BACKEND_PROBES === "1" || env.VITEST === "true";
22+
}
23+
24+
function toErrorMessage(cause: unknown, fallback: string): string {
25+
if (cause instanceof Error && cause.message.trim().length > 0) {
26+
if (cause.name === "AbortError") {
27+
return "timeout";
28+
}
29+
return cause.message;
30+
}
31+
if (typeof cause === "string" && cause.trim().length > 0) {
32+
return cause;
33+
}
34+
return fallback;
35+
}
36+
37+
function readModelCount(data: unknown, key: "models" | "data"): number | undefined {
38+
if (!data || typeof data !== "object") {
39+
return undefined;
40+
}
41+
const value = (data as Record<string, unknown>)[key];
42+
if (Array.isArray(value)) {
43+
return value.length;
44+
}
45+
return undefined;
46+
}
47+
48+
async function probeHttp(input: {
49+
readonly url: string;
50+
readonly modelsKey: "models" | "data";
51+
readonly timeoutMs: number;
52+
}): Promise<LocalBackendProbeResult> {
53+
const controller = new AbortController();
54+
const timeout = setTimeout(() => controller.abort(), input.timeoutMs);
55+
try {
56+
const response = await fetch(input.url, {
57+
method: "GET",
58+
signal: controller.signal,
59+
headers: { accept: "application/json" },
60+
});
61+
if (!response.ok) {
62+
return {
63+
reachable: false,
64+
error: `HTTP ${response.status}`,
65+
};
66+
}
67+
try {
68+
const body: unknown = await response.json();
69+
const modelCount = readModelCount(body, input.modelsKey);
70+
return modelCount !== undefined ? { reachable: true, modelCount } : { reachable: true };
71+
} catch (cause) {
72+
// Server responded 2xx but body wasn't JSON — still counts as reachable.
73+
return {
74+
reachable: true,
75+
error: toErrorMessage(cause, "Non-JSON response"),
76+
};
77+
}
78+
} catch (cause) {
79+
return {
80+
reachable: false,
81+
error: toErrorMessage(cause, "Network error"),
82+
};
83+
} finally {
84+
clearTimeout(timeout);
85+
}
86+
}
87+
88+
export interface ProbeLocalBackendOptions {
89+
readonly timeoutMs?: number | undefined;
90+
}
91+
92+
const UNREACHABLE_STUB: LocalBackendProbeResult = { reachable: false };
93+
94+
export const probeOllama = (
95+
options: ProbeLocalBackendOptions = {},
96+
): Effect.Effect<LocalBackendProbeResult> => {
97+
if (isSuppressedByEnv()) {
98+
return Effect.succeed(UNREACHABLE_STUB);
99+
}
100+
return Effect.promise(() =>
101+
probeHttp({
102+
url: OLLAMA_TAGS_URL,
103+
modelsKey: "models",
104+
timeoutMs: options.timeoutMs ?? DEFAULT_PROBE_TIMEOUT_MS,
105+
}),
106+
);
107+
};
108+
109+
export const probeLmStudio = (
110+
options: ProbeLocalBackendOptions = {},
111+
): Effect.Effect<LocalBackendProbeResult> => {
112+
if (isSuppressedByEnv()) {
113+
return Effect.succeed(UNREACHABLE_STUB);
114+
}
115+
return Effect.promise(() =>
116+
probeHttp({
117+
url: LM_STUDIO_MODELS_URL,
118+
modelsKey: "data",
119+
timeoutMs: options.timeoutMs ?? DEFAULT_PROBE_TIMEOUT_MS,
120+
}),
121+
);
122+
};
123+
124+
export const probeCodexLocalBackends = (
125+
options: ProbeLocalBackendOptions = {},
126+
): Effect.Effect<LocalBackendProbes> =>
127+
Effect.all(
128+
{
129+
ollama: probeOllama(options),
130+
lmstudio: probeLmStudio(options),
131+
},
132+
{ concurrency: "unbounded" },
133+
);

apps/server/src/wsServer.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ const defaultCodexConfigSummary = {
9696
selectedModelProviderId: null,
9797
entries: [],
9898
parseError: null,
99+
detectedLocalBackends: {
100+
ollama: { reachable: false },
101+
lmstudio: { reachable: false },
102+
},
99103
} as const;
100104

101105
const expectedServerBuildInfo = expect.objectContaining({

apps/server/src/wsServer.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1640,7 +1640,7 @@ export const createServer = Effect.fn(function* (): Effect.fn.Return<
16401640
case WS_METHODS.serverGetConfig:
16411641
const keybindingsConfig = yield* keybindingsManager.loadConfigState;
16421642
const providers = yield* getProviderStatuses();
1643-
const codexConfig = yield* readCodexConfigSummary();
1643+
const codexConfig = yield* readCodexConfigSummary({ probeLocalBackends: true });
16441644
return {
16451645
cwd,
16461646
keybindingsConfigPath,

apps/web/src/components/ChatView.tsx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5415,6 +5415,10 @@ export default function ChatView({
54155415
: selectableProviders
54165416
).includes(provider.provider),
54175417
)}
5418+
codexSelectedModelProviderId={
5419+
serverConfigQuery.data?.codexConfig?.selectedModelProviderId ?? null
5420+
}
5421+
openclawGatewayUrl={settings.openclawGatewayUrl}
54185422
{...(composerProviderState.modelPickerIconClassName
54195423
? {
54205424
activeProviderIconClassName:

0 commit comments

Comments
 (0)