Skip to content

Commit 2c7c1db

Browse files
committed
Add tests for tool registry, policy evaluator, and inbox triage assistant
42 tests covering: - ToolRegistryTests: registration, duplicate rejection, lookup, case-insensitive keys, scope filtering - AgentPolicyEvaluatorTests: allowlist enforcement, risk-level review gating (high/medium always review, low review by default), auto-apply opt-in, disabled profile denial, policy JSON parsing - InboxTriageAssistantTests: proposal creation, no direct board mutation, policy routing, validation, edge cases
1 parent a45f14e commit 2c7c1db

3 files changed

Lines changed: 704 additions & 0 deletions

File tree

Lines changed: 282 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,282 @@
1+
using FluentAssertions;
2+
using Moq;
3+
using Taskdeck.Application.Interfaces;
4+
using Taskdeck.Application.Services;
5+
using Taskdeck.Domain.Agents;
6+
using Taskdeck.Domain.Entities;
7+
using Taskdeck.Domain.Enums;
8+
using Taskdeck.Tests.Support;
9+
using Xunit;
10+
11+
namespace Taskdeck.Application.Tests.Services;
12+
13+
public class AgentPolicyEvaluatorTests
14+
{
15+
private readonly Mock<IUnitOfWork> _unitOfWorkMock;
16+
private readonly Mock<IAgentProfileRepository> _profileRepoMock;
17+
private readonly TaskdeckToolRegistry _toolRegistry;
18+
private readonly InMemoryLogger<AgentPolicyEvaluator> _logger;
19+
private readonly AgentPolicyEvaluator _evaluator;
20+
21+
public AgentPolicyEvaluatorTests()
22+
{
23+
_unitOfWorkMock = new Mock<IUnitOfWork>();
24+
_profileRepoMock = new Mock<IAgentProfileRepository>();
25+
_unitOfWorkMock.Setup(u => u.AgentProfiles).Returns(_profileRepoMock.Object);
26+
27+
_toolRegistry = new TaskdeckToolRegistry();
28+
_logger = new InMemoryLogger<AgentPolicyEvaluator>();
29+
_evaluator = new AgentPolicyEvaluator(_unitOfWorkMock.Object, _toolRegistry, _logger);
30+
}
31+
32+
private static ITaskdeckTool CreateTool(
33+
string key, ToolRiskLevel riskLevel, ToolScope scope = ToolScope.Inbox)
34+
{
35+
return new TaskdeckToolDefinition(key, $"Tool {key}", $"Desc for {key}", scope, riskLevel);
36+
}
37+
38+
private AgentProfile CreateProfile(
39+
string? policyJson = null,
40+
bool isEnabled = true)
41+
{
42+
var profile = new AgentProfile(
43+
Guid.NewGuid(),
44+
"Test Agent",
45+
"triage-v1",
46+
AgentScopeType.Workspace,
47+
policyJson: policyJson);
48+
49+
if (!isEnabled)
50+
profile.SetEnabled(false);
51+
52+
return profile;
53+
}
54+
55+
[Fact]
56+
public async Task EvaluateToolUse_ShouldDeny_WhenAgentProfileIdIsEmpty()
57+
{
58+
var decision = await _evaluator.EvaluateToolUseAsync(Guid.Empty, "inbox.triage");
59+
60+
decision.Allowed.Should().BeFalse();
61+
decision.Reason.Should().Contain("required");
62+
}
63+
64+
[Fact]
65+
public async Task EvaluateToolUse_ShouldDeny_WhenToolKeyIsEmpty()
66+
{
67+
var decision = await _evaluator.EvaluateToolUseAsync(Guid.NewGuid(), "");
68+
69+
decision.Allowed.Should().BeFalse();
70+
decision.Reason.Should().Contain("required");
71+
}
72+
73+
[Fact]
74+
public async Task EvaluateToolUse_ShouldDeny_WhenToolNotInRegistry()
75+
{
76+
var profileId = Guid.NewGuid();
77+
78+
var decision = await _evaluator.EvaluateToolUseAsync(profileId, "nonexistent.tool");
79+
80+
decision.Allowed.Should().BeFalse();
81+
decision.Reason.Should().Contain("not registered");
82+
}
83+
84+
[Fact]
85+
public async Task EvaluateToolUse_ShouldDeny_WhenProfileNotFound()
86+
{
87+
_toolRegistry.RegisterTool(CreateTool("inbox.triage", ToolRiskLevel.Medium));
88+
_profileRepoMock.Setup(r => r.GetByIdAsync(It.IsAny<Guid>(), It.IsAny<CancellationToken>()))
89+
.ReturnsAsync((AgentProfile?)null);
90+
91+
var decision = await _evaluator.EvaluateToolUseAsync(Guid.NewGuid(), "inbox.triage");
92+
93+
decision.Allowed.Should().BeFalse();
94+
decision.Reason.Should().Contain("not found");
95+
}
96+
97+
[Fact]
98+
public async Task EvaluateToolUse_ShouldDeny_WhenProfileIsDisabled()
99+
{
100+
_toolRegistry.RegisterTool(CreateTool("inbox.triage", ToolRiskLevel.Medium));
101+
var profile = CreateProfile(isEnabled: false);
102+
_profileRepoMock.Setup(r => r.GetByIdAsync(profile.Id, It.IsAny<CancellationToken>()))
103+
.ReturnsAsync(profile);
104+
105+
var decision = await _evaluator.EvaluateToolUseAsync(profile.Id, "inbox.triage");
106+
107+
decision.Allowed.Should().BeFalse();
108+
decision.Reason.Should().Contain("disabled");
109+
}
110+
111+
[Fact]
112+
public async Task EvaluateToolUse_ShouldDeny_WhenToolNotInAllowlist()
113+
{
114+
_toolRegistry.RegisterTool(CreateTool("inbox.triage", ToolRiskLevel.Medium));
115+
_toolRegistry.RegisterTool(CreateTool("board.read", ToolRiskLevel.Low));
116+
117+
var profile = CreateProfile(policyJson: "{\"allowedTools\":[\"board.read\"]}");
118+
_profileRepoMock.Setup(r => r.GetByIdAsync(profile.Id, It.IsAny<CancellationToken>()))
119+
.ReturnsAsync(profile);
120+
121+
var decision = await _evaluator.EvaluateToolUseAsync(profile.Id, "inbox.triage");
122+
123+
decision.Allowed.Should().BeFalse();
124+
decision.Reason.Should().Contain("not in this agent's allowed tool list");
125+
}
126+
127+
[Fact]
128+
public async Task EvaluateToolUse_ShouldAllow_WhenToolInAllowlist()
129+
{
130+
_toolRegistry.RegisterTool(CreateTool("inbox.triage", ToolRiskLevel.Medium));
131+
var profile = CreateProfile(policyJson: "{\"allowedTools\":[\"inbox.triage\"]}");
132+
_profileRepoMock.Setup(r => r.GetByIdAsync(profile.Id, It.IsAny<CancellationToken>()))
133+
.ReturnsAsync(profile);
134+
135+
var decision = await _evaluator.EvaluateToolUseAsync(profile.Id, "inbox.triage");
136+
137+
decision.Allowed.Should().BeTrue();
138+
}
139+
140+
[Fact]
141+
public async Task EvaluateToolUse_ShouldAllow_WhenAllowlistIsEmpty()
142+
{
143+
_toolRegistry.RegisterTool(CreateTool("inbox.triage", ToolRiskLevel.Medium));
144+
var profile = CreateProfile(); // default "{}" policy - empty allowlist means all allowed
145+
_profileRepoMock.Setup(r => r.GetByIdAsync(profile.Id, It.IsAny<CancellationToken>()))
146+
.ReturnsAsync(profile);
147+
148+
var decision = await _evaluator.EvaluateToolUseAsync(profile.Id, "inbox.triage");
149+
150+
decision.Allowed.Should().BeTrue();
151+
}
152+
153+
[Fact]
154+
public async Task EvaluateToolUse_ShouldRequireReview_ForHighRiskTool()
155+
{
156+
_toolRegistry.RegisterTool(CreateTool("board.delete", ToolRiskLevel.High));
157+
var profile = CreateProfile();
158+
_profileRepoMock.Setup(r => r.GetByIdAsync(profile.Id, It.IsAny<CancellationToken>()))
159+
.ReturnsAsync(profile);
160+
161+
var decision = await _evaluator.EvaluateToolUseAsync(profile.Id, "board.delete");
162+
163+
decision.Allowed.Should().BeTrue();
164+
decision.RequiresReview.Should().BeTrue();
165+
decision.Reason.Should().Contain("High-risk");
166+
}
167+
168+
[Fact]
169+
public async Task EvaluateToolUse_ShouldRequireReview_ForMediumRiskTool()
170+
{
171+
_toolRegistry.RegisterTool(CreateTool("inbox.triage", ToolRiskLevel.Medium));
172+
var profile = CreateProfile();
173+
_profileRepoMock.Setup(r => r.GetByIdAsync(profile.Id, It.IsAny<CancellationToken>()))
174+
.ReturnsAsync(profile);
175+
176+
var decision = await _evaluator.EvaluateToolUseAsync(profile.Id, "inbox.triage");
177+
178+
decision.Allowed.Should().BeTrue();
179+
decision.RequiresReview.Should().BeTrue();
180+
decision.Reason.Should().Contain("Medium-risk");
181+
}
182+
183+
[Fact]
184+
public async Task EvaluateToolUse_ShouldRequireReview_ForLowRiskTool_ByDefault()
185+
{
186+
_toolRegistry.RegisterTool(CreateTool("board.read-cards", ToolRiskLevel.Low));
187+
var profile = CreateProfile(); // no autoApplyLowRisk
188+
_profileRepoMock.Setup(r => r.GetByIdAsync(profile.Id, It.IsAny<CancellationToken>()))
189+
.ReturnsAsync(profile);
190+
191+
var decision = await _evaluator.EvaluateToolUseAsync(profile.Id, "board.read-cards");
192+
193+
decision.Allowed.Should().BeTrue();
194+
decision.RequiresReview.Should().BeTrue();
195+
decision.Reason.Should().Contain("auto-apply is off");
196+
}
197+
198+
[Fact]
199+
public async Task EvaluateToolUse_ShouldAllowDirect_ForLowRiskTool_WhenAutoApplyEnabled()
200+
{
201+
_toolRegistry.RegisterTool(CreateTool("board.read-cards", ToolRiskLevel.Low));
202+
var profile = CreateProfile(policyJson: "{\"autoApplyLowRisk\":true}");
203+
_profileRepoMock.Setup(r => r.GetByIdAsync(profile.Id, It.IsAny<CancellationToken>()))
204+
.ReturnsAsync(profile);
205+
206+
var decision = await _evaluator.EvaluateToolUseAsync(profile.Id, "board.read-cards");
207+
208+
decision.Allowed.Should().BeTrue();
209+
decision.RequiresReview.Should().BeFalse();
210+
decision.Reason.Should().Contain("auto-applied");
211+
}
212+
213+
[Fact]
214+
public async Task EvaluateToolUse_ShouldStillRequireReview_ForHighRisk_EvenWithAutoApply()
215+
{
216+
_toolRegistry.RegisterTool(CreateTool("board.delete", ToolRiskLevel.High));
217+
var profile = CreateProfile(policyJson: "{\"autoApplyLowRisk\":true}");
218+
_profileRepoMock.Setup(r => r.GetByIdAsync(profile.Id, It.IsAny<CancellationToken>()))
219+
.ReturnsAsync(profile);
220+
221+
var decision = await _evaluator.EvaluateToolUseAsync(profile.Id, "board.delete");
222+
223+
decision.Allowed.Should().BeTrue();
224+
decision.RequiresReview.Should().BeTrue();
225+
}
226+
227+
[Fact]
228+
public async Task EvaluateToolUse_ShouldLogDecisions()
229+
{
230+
_toolRegistry.RegisterTool(CreateTool("inbox.triage", ToolRiskLevel.Medium));
231+
var profile = CreateProfile();
232+
_profileRepoMock.Setup(r => r.GetByIdAsync(profile.Id, It.IsAny<CancellationToken>()))
233+
.ReturnsAsync(profile);
234+
235+
await _evaluator.EvaluateToolUseAsync(profile.Id, "inbox.triage");
236+
237+
_logger.Entries.Should().NotBeEmpty();
238+
_logger.Entries.Should().Contain(e => e.Message.Contains("inbox.triage"));
239+
}
240+
241+
#region ParsePolicy edge cases
242+
243+
[Fact]
244+
public void ParsePolicy_ShouldReturnDefaults_ForEmptyJson()
245+
{
246+
var config = AgentPolicyEvaluator.ParsePolicy("{}");
247+
248+
config.AllowedTools.Should().BeEmpty();
249+
config.AutoApplyLowRisk.Should().BeFalse();
250+
}
251+
252+
[Fact]
253+
public void ParsePolicy_ShouldReturnDefaults_ForMalformedJson()
254+
{
255+
var config = AgentPolicyEvaluator.ParsePolicy("not json");
256+
257+
config.AllowedTools.Should().BeEmpty();
258+
config.AutoApplyLowRisk.Should().BeFalse();
259+
}
260+
261+
[Fact]
262+
public void ParsePolicy_ShouldReturnDefaults_ForNull()
263+
{
264+
var config = AgentPolicyEvaluator.ParsePolicy(null);
265+
266+
config.AllowedTools.Should().BeEmpty();
267+
config.AutoApplyLowRisk.Should().BeFalse();
268+
}
269+
270+
[Fact]
271+
public void ParsePolicy_ShouldParseAllowedTools()
272+
{
273+
var config = AgentPolicyEvaluator.ParsePolicy(
274+
"{\"allowedTools\":[\"inbox.triage\",\"board.read\"]}");
275+
276+
config.AllowedTools.Should().HaveCount(2);
277+
config.AllowedTools.Should().Contain("inbox.triage");
278+
config.AllowedTools.Should().Contain("board.read");
279+
}
280+
281+
#endregion
282+
}

0 commit comments

Comments
 (0)