Skip to content

Commit 3938f64

Browse files
committed
fix(smoke): make live provider assertions deterministic
1 parent 90b124a commit 3938f64

1 file changed

Lines changed: 58 additions & 10 deletions

File tree

Tests/AgentRunKitTests/Smoke/SmokeTestHelpers.swift

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -532,14 +532,25 @@ struct SmokeBookReview: Codable, SchemaProviding {
532532
func assertSmokeNestedStructuredOutput(client: any LLMClient) async throws {
533533
let chat = Chat<EmptyContext>(client: client)
534534
let (review, history) = try await chat.send(
535-
"Write a short review of '1984' by George Orwell. Rate it 1-5. Include at least 2 tags.",
535+
"""
536+
Return a book review object with these exact fields:
537+
title: 1984
538+
author.name: George Orwell
539+
author.birthYear: 1903
540+
rating: 5
541+
tags: classic, dystopian
542+
sequel: null
543+
""",
536544
returning: SmokeBookReview.self
537545
)
538546

539-
#expect(!review.title.isEmpty)
540-
#expect(!review.author.name.isEmpty)
541-
#expect(review.rating >= 1 && review.rating <= 5)
542-
#expect(review.tags.count >= 2)
547+
#expect(review.title == "1984")
548+
#expect(review.author.name == "George Orwell")
549+
#expect(review.author.birthYear == 1903)
550+
#expect(review.rating == 5)
551+
#expect(review.tags.contains("classic"))
552+
#expect(review.tags.contains("dystopian"))
553+
#expect(review.sequel == nil)
543554
#expect(history.count >= 2)
544555
}
545556

@@ -681,29 +692,59 @@ func assertSmokeToolResultTruncation(client: any LLMClient) async throws {
681692

682693
func assertSmokeMaxMessages(client: any LLMClient) async throws {
683694
let addTool = try makeSmokeAddTool()
695+
let oldestPrompt = "Historical note alpha."
696+
let droppedPrompt = "Historical note beta."
697+
let retainedPrompt = "Historical note gamma."
698+
let seededHistory: [ChatMessage] = [
699+
.user(oldestPrompt),
700+
.assistant(AssistantMessage(content: "Noted alpha.")),
701+
.user(droppedPrompt),
702+
.assistant(AssistantMessage(content: "Noted beta.")),
703+
.user(retainedPrompt),
704+
.assistant(AssistantMessage(content: "Noted gamma.")),
705+
]
684706
let config = AgentConfiguration(
685707
maxIterations: 10,
686708
systemPrompt: """
687709
You are a calculator assistant. When asked to add numbers, use the add tool. \
688-
Perform each addition one at a time in sequence. \
689710
After all additions, report the last result using the finish tool.
690711
""",
691-
maxMessages: 6
712+
maxMessages: 3
692713
)
693714

694715
let agent = Agent<EmptyContext>(client: client, tools: [addTool], configuration: config)
695716
let result = try await agent.run(
696-
userMessage: "Add 1+1, then add 2+2, then add 3+3. Report the last result.",
717+
userMessage: "What is 2 + 2? Report the last result.",
718+
history: seededHistory,
697719
context: EmptyContext()
698720
)
699721

700722
let hasSystem = result.history.contains { $0.isSystem }
723+
let retainedOldestPrompt = result.history.contains { message in
724+
guard case let .user(content) = message else { return false }
725+
return content == oldestPrompt
726+
}
727+
let retainedDroppedPrompt = result.history.contains { message in
728+
guard case let .user(content) = message else { return false }
729+
return content == droppedPrompt
730+
}
731+
let addResultRetained = result.history.contains { message in
732+
guard case let .tool(_, name, content) = message else { return false }
733+
return name == "add" && content.contains("4")
734+
}
701735
#expect(hasSystem)
702-
#expect(result.history.count <= 8)
736+
#expect(!retainedOldestPrompt)
737+
#expect(!retainedDroppedPrompt)
738+
#expect(addResultRetained)
739+
try result.history.validateForAgentHistory()
703740
}
704741

705742
func assertSmokeBudgetEvents(client: any LLMClient) async throws {
706743
let addTool = try makeSmokeAddTool()
744+
let padding = Array(
745+
repeating: "This sentence exists to exercise context budget handling.",
746+
count: 24
747+
).joined(separator: " ")
707748
let config = AgentConfiguration(
708749
maxIterations: 5,
709750
systemPrompt: """
@@ -718,7 +759,14 @@ func assertSmokeBudgetEvents(client: any LLMClient) async throws {
718759
var budgetUpdatedCount = 0
719760
var budgetAdvisoryCount = 0
720761

721-
for try await event in agent.stream(userMessage: "What is 10 + 20?", context: EmptyContext()) {
762+
for try await event in agent.stream(
763+
userMessage: """
764+
\(padding)
765+
766+
What is 10 + 20? Use the add tool and then report the result using the finish tool.
767+
""",
768+
context: EmptyContext()
769+
) {
722770
switch event.kind {
723771
case .budgetUpdated:
724772
budgetUpdatedCount += 1

0 commit comments

Comments
 (0)