Skip to content

Commit b4bc1cb

Browse files
authored
Add note qc feature (#155)
# Summary This is a heavy but cool one! This change adds a note QC workflow. It allows users to add checks to notes with natural language then these checks run on publish. For example, you can verify a note vs the transcript. <img width="290" height="276" alt="image" src="https://github.com/user-attachments/assets/d9af6406-4532-44b4-94bf-5e6269c8653f" /> Checks can be added via the settings UI <img width="2106" height="1242" alt="2026-05-14 14 34 31" src="https://github.com/user-attachments/assets/75cae0f1-0f25-4c3f-b572-8cb2f89fd634" /> These checks run in the publish UI. They can update the note itself or the other fields on the note. The checks have access to tools to browse the prodtracking software. An example of where this can be used is to find attachments to plug into the note links, tos, or CCs, <img width="2106" height="1242" alt="2026-05-14 14 35 23" src="https://github.com/user-attachments/assets/842f918b-d411-4cc3-a271-e3aa8d7dd13b" /> ## Testing - [X] I have tested these changes locally - [ X ] I have run all relevant automated tests - [ X ] I have verified this does not break existing workflows - [X] For changes that can be tested in UI, I have included screenshots or gif animations of the changes. ## How I Tested - Created a transcript missing info and the QC catches it - Created a note mentioning a person who was not tagged - Checked content of a note to ensure it contained the requested information. --------- Signed-off-by: James Spadafora <spadjv@gmail.com>
1 parent 6edc895 commit b4bc1cb

36 files changed

Lines changed: 3664 additions & 58 deletions

backend/requirements.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
fastapi==0.104.1
22
uvicorn[standard]==0.24.0
3-
pydantic==2.5.0
4-
pydantic-settings==2.1.0
3+
pydantic==2.13.4
4+
pydantic-settings==2.8.1
5+
instructor==1.15.1
56
pytest==7.4.3
67
pytest-cov==4.1.0
78
pytest-asyncio==0.21.1
89
httpx==0.25.2
910
shotgun_api3==3.9.2
1011
pymongo==4.10.1
1112
websockets==12.0
12-
openai==1.58.1
13+
openai==2.36.0
1314
google-auth==2.0.0
14-
requests==2.28.0
15+
requests==2.32.3
1516
python-multipart==0.0.9
1617
PyYAML==6.0.1

backend/src/dna/llm_providers/llm_provider_base.py

Lines changed: 211 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,94 @@
33
Abstract base class for LLM providers and factory function.
44
"""
55

6+
import json
7+
import logging
68
import os
7-
from typing import Optional
9+
from typing import Any, Awaitable, Callable, Optional, TypeVar
810

11+
import instructor
912
from openai import AsyncOpenAI
13+
from pydantic import BaseModel
1014

1115
from dna.prompts.generate_note_prompt import GENERATE_NOTE_PROMPT
1216

17+
logger = logging.getLogger(__name__)
18+
19+
T = TypeVar("T", bound=BaseModel)
20+
21+
DEFAULT_MAX_TOOL_RESULT_CHARS = 50_000
22+
23+
24+
def _truncate_tool_result(content: str, max_chars: int) -> str:
25+
if len(content) <= max_chars:
26+
return content
27+
suffix = "\n...[truncated]"
28+
return content[: max_chars - len(suffix)] + suffix
29+
30+
31+
def _safe_parse_tool_arguments(
32+
raw: str | None,
33+
) -> tuple[dict[str, Any] | None, str | None]:
34+
text = (raw or "").strip() or "{}"
35+
try:
36+
parsed = json.loads(text)
37+
except json.JSONDecodeError as exc:
38+
err = json.dumps(
39+
{
40+
"error": "Invalid JSON in tool arguments.",
41+
"detail": str(exc),
42+
}
43+
)
44+
return None, err
45+
if not isinstance(parsed, dict):
46+
return None, json.dumps({"error": "Tool arguments must be a JSON object."})
47+
return parsed, None
48+
49+
50+
def _assistant_message_with_tool_calls(
51+
msg: Any, tool_calls: list[Any]
52+
) -> dict[str, Any]:
53+
return {
54+
"role": "assistant",
55+
"content": msg.content,
56+
"tool_calls": [
57+
{
58+
"id": tc.id,
59+
"type": getattr(tc, "type", "function") or "function",
60+
"function": {
61+
"name": tc.function.name,
62+
"arguments": tc.function.arguments or "{}",
63+
},
64+
}
65+
for tc in tool_calls
66+
],
67+
}
68+
69+
70+
async def _append_tool_use_round(
71+
messages: list[dict[str, Any]],
72+
msg: Any,
73+
tool_calls: list[Any],
74+
tool_executor: Callable[[str, dict[str, Any]], Awaitable[str]],
75+
max_tool_result_chars: int,
76+
) -> None:
77+
messages.append(_assistant_message_with_tool_calls(msg, tool_calls))
78+
for tc in tool_calls:
79+
args, parse_error = _safe_parse_tool_arguments(tc.function.arguments)
80+
if parse_error is not None:
81+
result = parse_error
82+
else:
83+
assert args is not None
84+
result = await tool_executor(tc.function.name, args)
85+
result = _truncate_tool_result(result, max_tool_result_chars)
86+
messages.append(
87+
{
88+
"role": "tool",
89+
"tool_call_id": tc.id,
90+
"content": result,
91+
}
92+
)
93+
1394

1495
class LLMProviderBase:
1596
"""Abstract base class for LLM providers."""
@@ -119,6 +200,135 @@ async def generate_note(
119200

120201
return response.choices[0].message.content or ""
121202

203+
async def generate_with_tools(
204+
self,
205+
system_prompt: str,
206+
user_message: str,
207+
tools: list[dict[str, Any]],
208+
tool_executor: Callable[[str, dict[str, Any]], Awaitable[str]],
209+
max_iterations: int = 5,
210+
temperature: float = 0.2,
211+
max_tool_result_chars: int = DEFAULT_MAX_TOOL_RESULT_CHARS,
212+
) -> str:
213+
"""Run an agentic loop: LLM may call tools until it returns final text."""
214+
messages: list[dict[str, Any]] = [
215+
{"role": "system", "content": system_prompt},
216+
{"role": "user", "content": user_message},
217+
]
218+
last_text = ""
219+
for _ in range(max_iterations):
220+
response = await self.client.chat.completions.create(
221+
model=self.model,
222+
messages=messages,
223+
tools=tools,
224+
tool_choice="auto",
225+
temperature=temperature,
226+
max_tokens=2048,
227+
)
228+
choice = response.choices[0]
229+
msg = choice.message
230+
last_text = msg.content or ""
231+
tool_calls = getattr(msg, "tool_calls", None) or []
232+
if tool_calls:
233+
await _append_tool_use_round(
234+
messages,
235+
msg,
236+
tool_calls,
237+
tool_executor,
238+
max_tool_result_chars,
239+
)
240+
else:
241+
return last_text
242+
return last_text
243+
244+
async def generate_structured_with_tools(
245+
self,
246+
system_prompt: str,
247+
user_message: str,
248+
tools: list[dict[str, Any]],
249+
tool_executor: Callable[[str, dict[str, Any]], Awaitable[str]],
250+
response_model: type[T],
251+
max_iterations: int = 5,
252+
temperature: float = 0.2,
253+
max_tool_result_chars: int = DEFAULT_MAX_TOOL_RESULT_CHARS,
254+
) -> T:
255+
"""Tool-use phase then instructor-validated structured extraction."""
256+
messages: list[dict[str, Any]] = [
257+
{"role": "system", "content": system_prompt},
258+
{"role": "user", "content": user_message},
259+
]
260+
hit_limit_with_tools = False
261+
for i in range(max_iterations):
262+
response = await self.client.chat.completions.create(
263+
model=self.model,
264+
messages=messages,
265+
tools=tools,
266+
tool_choice="auto",
267+
temperature=temperature,
268+
max_tokens=2048,
269+
)
270+
choice = response.choices[0]
271+
msg = choice.message
272+
tool_calls = getattr(msg, "tool_calls", None) or []
273+
if tool_calls:
274+
await _append_tool_use_round(
275+
messages,
276+
msg,
277+
tool_calls,
278+
tool_executor,
279+
max_tool_result_chars,
280+
)
281+
if i == max_iterations - 1:
282+
hit_limit_with_tools = True
283+
else:
284+
messages.append(
285+
{"role": "assistant", "content": msg.content or ""},
286+
)
287+
hit_limit_with_tools = False
288+
break
289+
290+
if hit_limit_with_tools:
291+
logger.warning(
292+
"Tool-use loop reached max_iterations=%s with pending tool rounds; "
293+
"requesting a final assistant message with tool_choice=none before "
294+
"structured extraction.",
295+
max_iterations,
296+
)
297+
response = await self.client.chat.completions.create(
298+
model=self.model,
299+
messages=messages,
300+
tools=tools,
301+
tool_choice="none",
302+
temperature=temperature,
303+
max_tokens=2048,
304+
)
305+
final_msg = response.choices[0].message
306+
messages.append(
307+
{"role": "assistant", "content": final_msg.content or ""},
308+
)
309+
310+
extraction_messages = list(messages)
311+
extraction_messages.append(
312+
{
313+
"role": "user",
314+
"content": (
315+
"Provide your final quality-check result for this draft and check. "
316+
"Fill every required field in the structured response schema."
317+
),
318+
}
319+
)
320+
instructor_client = instructor.from_openai(
321+
self.client,
322+
mode=instructor.Mode.JSON,
323+
)
324+
return await instructor_client.chat.completions.create(
325+
model=self.model,
326+
messages=extraction_messages,
327+
response_model=response_model,
328+
temperature=temperature,
329+
max_tokens=2048,
330+
)
331+
122332

123333
def get_llm_provider() -> LLMProviderBase:
124334
"""Factory function to get the configured LLM provider."""

backend/src/dna/models/__init__.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,18 @@
2727
PlaylistMetadata,
2828
PlaylistMetadataUpdate,
2929
)
30+
from dna.models.qc_check import (
31+
DEFAULT_ACTION_ITEM_CHECK,
32+
NoteQCAttributeSuggestion,
33+
NoteQCCheck,
34+
NoteQCCheckCreate,
35+
NoteQCCheckUpdate,
36+
NoteQCLLMOutput,
37+
NoteQCResult,
38+
NoteQCSeverity,
39+
RunQCChecksRequest,
40+
RunQCChecksResponse,
41+
)
3042
from dna.models.requests import (
3143
CreateNoteRequest,
3244
EntityLink,
@@ -103,4 +115,14 @@
103115
"UserSettings",
104116
"UserSettingsUpdate",
105117
"UserSettingsResponse",
118+
"NoteQCSeverity",
119+
"NoteQCCheckCreate",
120+
"NoteQCCheckUpdate",
121+
"NoteQCCheck",
122+
"NoteQCAttributeSuggestion",
123+
"NoteQCLLMOutput",
124+
"NoteQCResult",
125+
"RunQCChecksRequest",
126+
"RunQCChecksResponse",
127+
"DEFAULT_ACTION_ITEM_CHECK",
106128
]

backend/src/dna/models/qc_check.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
"""Models for user-defined note QC checks and run results."""
2+
3+
from datetime import datetime
4+
from typing import Literal, Optional
5+
6+
from pydantic import BaseModel, ConfigDict, Field
7+
8+
from dna.models.draft_note import DraftNoteLink
9+
10+
NoteQCSeverity = Literal["warning", "error"]
11+
12+
13+
class NoteQCCheckCreate(BaseModel):
14+
"""Payload for creating a QC check."""
15+
16+
name: str
17+
prompt: str
18+
severity: NoteQCSeverity
19+
enabled: bool = True
20+
21+
22+
class NoteQCCheckUpdate(BaseModel):
23+
"""Partial update for a QC check."""
24+
25+
name: Optional[str] = None
26+
prompt: Optional[str] = None
27+
severity: Optional[NoteQCSeverity] = None
28+
enabled: Optional[bool] = None
29+
30+
31+
class NoteQCCheck(BaseModel):
32+
"""Stored QC check definition."""
33+
34+
model_config = ConfigDict(populate_by_name=True)
35+
36+
id: str = Field(alias="_id")
37+
user_email: str
38+
name: str
39+
prompt: str
40+
severity: NoteQCSeverity
41+
enabled: bool = True
42+
created_at: datetime
43+
updated_at: datetime
44+
45+
46+
class NoteQCAttributeSuggestion(BaseModel):
47+
"""Suggested updates to draft note metadata."""
48+
49+
to: Optional[str] = None
50+
cc: Optional[str] = None
51+
subject: Optional[str] = None
52+
version_status: Optional[str] = Field(
53+
default=None,
54+
description="Suggested version status (maps to draft version_status).",
55+
)
56+
links: Optional[list[DraftNoteLink]] = None
57+
58+
59+
class NoteQCLLMOutput(BaseModel):
60+
"""Structured QC verdict returned by the LLM (instructor-validated)."""
61+
62+
passed: bool
63+
issue: Optional[str] = None
64+
evidence: Optional[str] = None
65+
note_suggestion: Optional[str] = Field(
66+
default=None,
67+
description="Full suggested note body when the check fails.",
68+
)
69+
attribute_suggestion: Optional[NoteQCAttributeSuggestion] = None
70+
71+
72+
class NoteQCResult(BaseModel):
73+
"""Outcome of running one check against a draft."""
74+
75+
check_id: str
76+
check_name: str
77+
severity: NoteQCSeverity
78+
passed: bool
79+
issue: Optional[str] = None
80+
evidence: Optional[str] = None
81+
note_suggestion: Optional[str] = None
82+
attribute_suggestion: Optional[NoteQCAttributeSuggestion] = None
83+
84+
85+
class RunQCChecksRequest(BaseModel):
86+
"""Body for run-qc-checks; playlist/version are path params."""
87+
88+
user_email: str
89+
90+
91+
class RunQCChecksResponse(BaseModel):
92+
"""QC results for one draft note."""
93+
94+
results: list[NoteQCResult]
95+
96+
97+
DEFAULT_ACTION_ITEM_CHECK = NoteQCCheckCreate(
98+
name="Action Item Check",
99+
prompt=(
100+
"Review the transcript and note below. "
101+
"If the transcript mentions an action item, task, or decision that is NOT "
102+
"reflected in the note, report it. Otherwise respond with passed=true."
103+
),
104+
severity="warning",
105+
enabled=True,
106+
)

0 commit comments

Comments
 (0)