@@ -3732,6 +3732,7 @@ export interface components {
37323732 tool_calls?: components["schemas"]["ChatCompletionMessageFunctionToolCallParam"][];
37333733 /** Latency Ms */
37343734 latency_ms?: number | null;
3735+ usage?: components["schemas"]["MessageUsage"] | null;
37353736 };
37363737 /**
37373738 * ChatCompletionAssistantMessageParamWrapper
@@ -3762,6 +3763,7 @@ export interface components {
37623763 tool_calls?: components["schemas"]["ChatCompletionMessageFunctionToolCallParam"][];
37633764 /** Latency Ms */
37643765 latency_ms?: number | null;
3766+ usage?: components["schemas"]["MessageUsage"] | null;
37653767 };
37663768 /** ChatCompletionContentPartImageParam */
37673769 ChatCompletionContentPartImageParam: {
@@ -7226,6 +7228,47 @@ export interface components {
72267228 */
72277229 mean_total_llm_latency_ms?: number | null;
72287230 };
7231+ /**
7232+ * MessageUsage
7233+ * @description Token usage and cost for a single LLM call or a multi-message sum.
7234+ *
7235+ * Carries only the fields that are meaningfully aggregatable across
7236+ * messages: token counts and cost. Per-call latency lives on the
7237+ * individual message's ``latency_ms`` field; aggregating it across the
7238+ * full trace would mix latencies from different points in time, so
7239+ * ``MessageUsage`` does NOT carry ``total_llm_latency_ms``.
7240+ *
7241+ * The :class:`Usage` subclass adds ``total_llm_latency_ms`` for the
7242+ * in-flight per-run accumulator that tracks how long this run spent
7243+ * waiting on LLM calls.
7244+ */
7245+ MessageUsage: {
7246+ /**
7247+ * Input Tokens
7248+ * @description The number of input tokens used.
7249+ */
7250+ input_tokens?: number | null;
7251+ /**
7252+ * Output Tokens
7253+ * @description The number of output tokens used.
7254+ */
7255+ output_tokens?: number | null;
7256+ /**
7257+ * Total Tokens
7258+ * @description The total number of tokens used.
7259+ */
7260+ total_tokens?: number | null;
7261+ /**
7262+ * Cost
7263+ * @description The cost in US dollars, saved at runtime (prices can change over time).
7264+ */
7265+ cost?: number | null;
7266+ /**
7267+ * Cached Tokens
7268+ * @description Number of tokens served from prompt cache. None if not reported.
7269+ */
7270+ cached_tokens?: number | null;
7271+ };
72297272 /** ModelDetails */
72307273 ModelDetails: {
72317274 /** Id */
@@ -9599,6 +9642,8 @@ export interface components {
95999642 tags: string[];
96009643 /** @description Usage information for the task run. This includes the number of input tokens, output tokens, and total tokens used. */
96019644 usage?: components["schemas"]["Usage"] | null;
9645+ /** @description Sum of per-message token usage and cost across the entire trace, including any seeded prior trace. None on records created before this field existed. For a fresh (non-seeded) run, the token / cost fields equal those of `usage`. */
9646+ cumulative_usage?: components["schemas"]["MessageUsage"] | null;
96029647 /**
96039648 * Trace
96049649 * @description The trace of the task run in OpenAI format. This is the list of messages that were sent to/from the model.
@@ -9676,6 +9721,8 @@ export interface components {
96769721 tags: string[];
96779722 /** @description Usage information for the task run. This includes the number of input tokens, output tokens, and total tokens used. */
96789723 usage?: components["schemas"]["Usage"] | null;
9724+ /** @description Sum of per-message token usage and cost across the entire trace, including any seeded prior trace. None on records created before this field existed. For a fresh (non-seeded) run, the token / cost fields equal those of `usage`. */
9725+ cumulative_usage?: components["schemas"]["MessageUsage"] | null;
96799726 /**
96809727 * Trace
96819728 * @description The trace of the task run in OpenAI format. This is the list of messages that were sent to/from the model.
@@ -10184,27 +10231,33 @@ export interface components {
1018410231 };
1018510232 /**
1018610233 * Usage
10187- * @description Token usage and cost information for a task run.
10234+ * @description Token usage, cost, and aggregate LLM latency for a per-run accumulator.
10235+ *
10236+ * Extends :class:`MessageUsage` with ``total_llm_latency_ms``, which is
10237+ * only meaningful while a single run is in flight (its model calls run
10238+ * sequentially in real time). For per-message records and full-trace
10239+ * sums use :class:`MessageUsage` — those values would mix latencies
10240+ * from different points in time, so the field doesn't apply.
1018810241 */
1018910242 Usage: {
1019010243 /**
1019110244 * Input Tokens
10192- * @description The number of input tokens used in the task run .
10245+ * @description The number of input tokens used.
1019310246 */
1019410247 input_tokens?: number | null;
1019510248 /**
1019610249 * Output Tokens
10197- * @description The number of output tokens used in the task run .
10250+ * @description The number of output tokens used.
1019810251 */
1019910252 output_tokens?: number | null;
1020010253 /**
1020110254 * Total Tokens
10202- * @description The total number of tokens used in the task run .
10255+ * @description The total number of tokens used.
1020310256 */
1020410257 total_tokens?: number | null;
1020510258 /**
1020610259 * Cost
10207- * @description The cost of the task run in US dollars, saved at runtime (prices can change over time).
10260+ * @description The cost in US dollars, saved at runtime (prices can change over time).
1020810261 */
1020910262 cost?: number | null;
1021010263 /**
0 commit comments