Skip to content

Commit b51d246

Browse files
Sunrisepeakclaude
andcommitted
feat: add prompt caching support (Anthropic cache_control + OpenAI cached_tokens)
- Add CacheControl struct and optional cacheControl field to Message - Add cacheCreationTokens and cacheReadTokens fields to Usage - Anthropic: serialize system prompt as content blocks array with cache_control ephemeral - Anthropic: add cache_control on last tool definition for prompt caching - Anthropic: parse cache_creation_input_tokens and cache_read_input_tokens from usage (both streaming and non-streaming) - OpenAI: add stream_options include_usage for accurate token counting in streams - OpenAI: parse cached_tokens from prompt_tokens_details in usage - Version bump to 0.2.3 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 21fcd6e commit b51d246

File tree

4 files changed

+39
-4
lines changed

4 files changed

+39
-4
lines changed

src/providers/anthropic.cppm

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ public:
101101
}
102102
if (msg.contains("usage")) {
103103
result.usage.inputTokens = msg["usage"].value("input_tokens", 0);
104+
result.usage.cacheCreationTokens = msg["usage"].value("cache_creation_input_tokens", 0);
105+
result.usage.cacheReadTokens = msg["usage"].value("cache_read_input_tokens", 0);
104106
}
105107
}
106108
} else if (event.event == "content_block_start") {
@@ -324,7 +326,13 @@ private:
324326
auto [systemText, msgArray] = extract_system_and_messages_(messages);
325327

326328
if (!systemText.empty()) {
327-
payload["system"] = systemText;
329+
Json sysBlocks = Json::array();
330+
Json block;
331+
block["type"] = "text";
332+
block["text"] = systemText;
333+
block["cache_control"] = Json{{"type", "ephemeral"}};
334+
sysBlocks.push_back(block);
335+
payload["system"] = sysBlocks;
328336
}
329337
payload["messages"] = msgArray;
330338

@@ -348,7 +356,8 @@ private:
348356
// Tools — Anthropic format (no function wrapper)
349357
if (params.tools.has_value() && !params.tools->empty()) {
350358
Json tools = Json::array();
351-
for (const auto& tool : *params.tools) {
359+
for (std::size_t i = 0; i < params.tools->size(); ++i) {
360+
const auto& tool = (*params.tools)[i];
352361
Json t;
353362
t["name"] = tool.name;
354363
t["description"] = tool.description;
@@ -357,6 +366,10 @@ private:
357366
} else {
358367
t["input_schema"] = Json{{"type", "object"}};
359368
}
369+
// Add cache_control on last tool
370+
if (i == params.tools->size() - 1) {
371+
t["cache_control"] = Json{{"type", "ephemeral"}};
372+
}
360373
tools.push_back(t);
361374
}
362375
payload["tools"] = tools;
@@ -428,6 +441,8 @@ private:
428441
result.usage.inputTokens = usage.value("input_tokens", 0);
429442
result.usage.outputTokens = usage.value("output_tokens", 0);
430443
result.usage.totalTokens = result.usage.inputTokens + result.usage.outputTokens;
444+
result.usage.cacheCreationTokens = usage.value("cache_creation_input_tokens", 0);
445+
result.usage.cacheReadTokens = usage.value("cache_read_input_tokens", 0);
431446
}
432447

433448
return result;

src/providers/openai.cppm

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ public:
133133
result.usage.inputTokens = usage.value("prompt_tokens", 0);
134134
result.usage.outputTokens = usage.value("completion_tokens", 0);
135135
result.usage.totalTokens = result.usage.inputTokens + result.usage.outputTokens;
136+
if (usage.contains("prompt_tokens_details") && usage["prompt_tokens_details"].is_object()) {
137+
result.usage.cacheReadTokens = usage["prompt_tokens_details"].value("cached_tokens", 0);
138+
}
136139
}
137140
} catch (const Json::exception&) {
138141
// Skip malformed chunks
@@ -303,6 +306,7 @@ private:
303306

304307
if (stream) {
305308
payload["stream"] = true;
309+
payload["stream_options"] = Json{{"include_usage", true}};
306310
}
307311

308312
if (params.temperature.has_value()) {
@@ -425,6 +429,9 @@ private:
425429
result.usage.inputTokens = usage.value("prompt_tokens", 0);
426430
result.usage.outputTokens = usage.value("completion_tokens", 0);
427431
result.usage.totalTokens = result.usage.inputTokens + result.usage.outputTokens;
432+
if (usage.contains("prompt_tokens_details") && usage["prompt_tokens_details"].is_object()) {
433+
result.usage.cacheReadTokens = usage["prompt_tokens_details"].value("cached_tokens", 0);
434+
}
428435
}
429436

430437
return result;

src/types.cppm

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,16 @@ export using ContentPart =
4040
std::variant<TextContent, ImageContent, AudioContent, ToolUseContent, ToolResultContent>;
4141
export using Content = std::variant<std::string, std::vector<ContentPart>>;
4242

43+
export struct CacheControl {
44+
std::string type {"ephemeral"};
45+
};
46+
4347
// Message
4448
export struct Message {
4549
Role role;
4650
Content content;
4751
std::string name;
52+
std::optional<CacheControl> cacheControl;
4853

4954
static Message system(std::string_view text) {
5055
return Message{.role = Role::System, .content = std::string{text}};
@@ -111,6 +116,8 @@ export struct Usage {
111116
int inputTokens{0};
112117
int outputTokens{0};
113118
int totalTokens{0};
119+
int cacheCreationTokens{0};
120+
int cacheReadTokens{0};
114121
};
115122

116123
// Chat response
@@ -251,6 +258,9 @@ inline Json messageToJson(const Message& msg) {
251258
j["content"] = arr;
252259
}
253260
}, msg.content);
261+
if (msg.cacheControl) {
262+
j["cache_control"] = Json{{"type", msg.cacheControl->type}};
263+
}
254264
return j;
255265
}
256266

@@ -267,6 +277,9 @@ inline Message messageFromJson(const Json& j) {
267277
}
268278
msg.content = std::move(parts);
269279
}
280+
if (j.contains("cache_control") && j["cache_control"].is_object()) {
281+
msg.cacheControl = CacheControl{.type = j["cache_control"].value("type", "ephemeral")};
282+
}
270283
return msg;
271284
}
272285

@@ -277,7 +290,7 @@ void Conversation::save(std::string_view filePath) const {
277290
j["messages"].push_back(messageToJson(msg));
278291
}
279292
std::ofstream out{std::string{filePath}};
280-
out << j.dump(2);
293+
out << j.dump(2, ' ', false, Json::error_handler_t::replace);
281294
}
282295

283296
Conversation Conversation::load(std::string_view filePath) {

xmake.lua

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
set_languages("c++23")
2-
set_version("0.2.0")
2+
set_version("0.2.3")
33
set_policy("build.c++.modules", true)
44

55
add_repositories("mcpplibs-index https://github.com/mcpplibs/mcpplibs-index.git")

0 commit comments

Comments
 (0)