Skip to content

Commit 409ea9b

Browse files
committed
test(uipath-agents): four coded-framework e2e tests
Adds zero-to-running tests for the four coded-agent frameworks the skill teaches, each going scaffold -> uip codedagent init -> uip codedagent run end-to-end: - skill-agent-coded-simple-echo (smoke) — Pydantic Input/Output, no LLM, deterministic; runs the agent locally and asserts echoed output. - skill-agent-coded-langgraph-classifier (e2e) — StateGraph + lazy UiPathChat() inside a node, schema-sync from Pydantic GraphInput/GraphOutput. - skill-agent-coded-llamaindex-workflow (e2e) — Workflow subclass with StartEvent/StopEvent fields and lazy UiPathOpenAI inside a @step. - skill-agent-coded-openai-agents-handoff (e2e) — multi-agent triage with handoffs to billing/technical specialists, exercising the factory-function pattern (def main() -> Agent). Each task uses a goal-only prompt so the skill drives the "how". Each check script asserts: pyproject hygiene (no [build-system], authors present, framework dep declared), runtime config file shape (langgraph.json / llama_index.json / openai_agents.json / uipath.json functions), source-code shape (Pydantic models, framework-specific exports), the lazy-LLM-init invariant via _shared/ast_lazy_init_check, schema-sync between declared models and entry-points.json, and bindings.json envelope well-formedness.
1 parent 1f97478 commit 409ea9b

8 files changed

Lines changed: 961 additions & 0 deletions

File tree

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
#!/usr/bin/env python3
2+
"""LangGraph agent project-shape check.
3+
4+
Asserts the lazy-LLM-init invariant (Critical Rule C4) and the
5+
schema-sync invariant (`entry-points.json` reflects the Pydantic
6+
Input/Output classes the agent declared).
7+
8+
Checks performed:
9+
10+
1. `support-classifier/pyproject.toml` declares `uipath-langchain`
11+
as a dependency, has `[project]` with `authors`, and contains NO
12+
`[build-system]` section.
13+
2. The project has either `langgraph.json` (Pattern A — recommended)
14+
OR `uipath.json` with a `functions.graph` entry (Pattern B). Both
15+
are valid per the LangGraph integration guide.
16+
3. `main.py` (or `graph.py`) defines `GraphInput`/`GraphOutput`
17+
Pydantic models, exports a top-level `graph` variable, and has
18+
NO module-level UiPath* construction (`UiPathChat`,
19+
`UiPathAzureChatOpenAI`, etc.).
20+
4. `entry-points.json` has one entrypoint whose schemas mention
21+
`text` (input) and `category` (output) — proves `uip codedagent
22+
init` ran AFTER the Pydantic models were written.
23+
5. `bindings.json` is the v2.0 envelope (resource count is not
24+
asserted — the classifier itself uses no SDK resources).
25+
26+
Exits 0 on PASS, with a `FAIL: ...` message on the first violation.
27+
"""
28+
29+
from __future__ import annotations
30+
31+
import json
32+
import os
33+
import sys
34+
from pathlib import Path
35+
36+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
37+
from _shared.bindings_assertions import load_bindings # noqa: E402
38+
from _shared.ast_lazy_init_check import find_module_level_llm_clients # noqa: E402
39+
from _shared.project_root import find_project_root # noqa: E402
40+
41+
ROOT = find_project_root("support-classifier")
42+
43+
44+
def _read_text(path: Path) -> str:
45+
if not path.is_file():
46+
sys.exit(f"FAIL: Missing {path}")
47+
return path.read_text(encoding="utf-8")
48+
49+
50+
def _load_json(path: Path) -> dict:
51+
raw = _read_text(path)
52+
try:
53+
return json.loads(raw)
54+
except json.JSONDecodeError as e:
55+
sys.exit(f"FAIL: {path} is not valid JSON: {e}")
56+
57+
58+
def check_pyproject() -> None:
59+
text = _read_text(ROOT / "pyproject.toml")
60+
if "[build-system]" in text:
61+
sys.exit(
62+
"FAIL: pyproject.toml contains a [build-system] section — "
63+
"Critical Rule C1 forbids it."
64+
)
65+
if "[project]" not in text or "authors" not in text:
66+
sys.exit("FAIL: pyproject.toml is missing [project] or `authors`")
67+
if "uipath-langchain" not in text:
68+
sys.exit(
69+
"FAIL: pyproject.toml does not declare `uipath-langchain` — "
70+
"the LangGraph integration guide makes this dependency mandatory."
71+
)
72+
print("OK: pyproject.toml is hygienic and declares uipath-langchain")
73+
74+
75+
def find_graph_module() -> Path:
76+
for candidate in ("main.py", "graph.py"):
77+
path = ROOT / candidate
78+
if path.is_file():
79+
return path
80+
sys.exit(
81+
"FAIL: neither main.py nor graph.py found under "
82+
f"{ROOT} — LangGraph integration guide requires one."
83+
)
84+
85+
86+
def check_graph_module(path: Path) -> None:
87+
text = _read_text(path)
88+
for needle in ("GraphInput", "GraphOutput", "graph"):
89+
if needle not in text:
90+
sys.exit(f"FAIL: {path.name} is missing `{needle}`")
91+
if "StateGraph" not in text and "CompiledStateGraph" not in text:
92+
sys.exit(f"FAIL: {path.name} does not reference StateGraph / CompiledStateGraph")
93+
print(f"OK: {path.name} defines GraphInput, GraphOutput, and a graph variable")
94+
violations = find_module_level_llm_clients(path)
95+
if violations:
96+
sys.exit("FAIL: " + " | ".join(violations))
97+
print(
98+
f"OK: {path.name} has no module-level UiPath* construction "
99+
"(lazy-LLM-init invariant holds)"
100+
)
101+
102+
103+
def check_runtime_config() -> None:
104+
langgraph_json = ROOT / "langgraph.json"
105+
uipath_json = ROOT / "uipath.json"
106+
if langgraph_json.is_file():
107+
doc = _load_json(langgraph_json)
108+
graphs = doc.get("graphs") or {}
109+
if not graphs:
110+
sys.exit("FAIL: langgraph.json has no `graphs` mapping")
111+
target = next(iter(graphs.values()))
112+
if not isinstance(target, str) or ":graph" not in target:
113+
sys.exit(
114+
f'FAIL: langgraph.json graphs entry should map to a `<file>:graph` '
115+
f'reference, got {target!r}'
116+
)
117+
print(f"OK: langgraph.json registers a graph -> {target!r}")
118+
return
119+
if uipath_json.is_file():
120+
doc = _load_json(uipath_json)
121+
functions = doc.get("functions") or {}
122+
graph_entry = functions.get("graph")
123+
if not graph_entry or ":graph" not in graph_entry:
124+
sys.exit(
125+
"FAIL: neither langgraph.json nor uipath.json `functions.graph` "
126+
"is present — the runtime cannot find the compiled graph."
127+
)
128+
print(f'OK: uipath.json registers functions.graph -> {graph_entry!r}')
129+
return
130+
sys.exit(
131+
"FAIL: project has neither langgraph.json nor uipath.json — at "
132+
"least one is required for `uip codedagent init` to succeed."
133+
)
134+
135+
136+
def check_entry_points() -> None:
137+
doc = _load_json(ROOT / "entry-points.json")
138+
entrypoints = doc.get("entryPoints") or []
139+
if not entrypoints:
140+
sys.exit("FAIL: entry-points.json has no entryPoints — `uip codedagent init` did not run successfully")
141+
raw = json.dumps(entrypoints)
142+
for field in ("text", "category"):
143+
if field not in raw:
144+
sys.exit(
145+
f'FAIL: entry-points.json schemas do not mention `{field}`. '
146+
f'Either `uip codedagent init` ran before the Pydantic models '
147+
f'were written, or the models did not declare the expected '
148+
f'fields. Got: {raw}'
149+
)
150+
print(
151+
"OK: entry-points.json schemas reflect the GraphInput/GraphOutput "
152+
"fields (text, category)"
153+
)
154+
155+
156+
def check_bindings() -> None:
157+
load_bindings(ROOT / "bindings.json")
158+
print("OK: bindings.json envelope is well-formed")
159+
160+
161+
def main() -> None:
162+
if not ROOT.is_dir():
163+
sys.exit(f"FAIL: project directory {ROOT} does not exist")
164+
check_pyproject()
165+
graph_module = find_graph_module()
166+
check_graph_module(graph_module)
167+
check_runtime_config()
168+
check_entry_points()
169+
check_bindings()
170+
if not (ROOT / "run_marker.txt").is_file():
171+
sys.exit(f"FAIL: {ROOT}/run_marker.txt does not exist — `uip codedagent run` likely never finished")
172+
print("OK: run_marker.txt exists (run completed cleanly)")
173+
174+
175+
if __name__ == "__main__":
176+
main()
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
task_id: skill-agent-coded-langgraph-classifier
2+
description: >
3+
LangGraph coded agent zero-to-running. Verifies the agent
4+
scaffolds a LangGraph project (`uip codedagent new` →
5+
`langgraph.json` + `main.py` exporting a compiled `graph`), keeps
6+
LLM construction inside a node body (Critical Rule C4 —
7+
module-level `UiPathChat()` breaks `uip codedagent init` with
8+
`ImportError`), lets `uip codedagent init` regenerate the schema
9+
from the actual code, and runs the classifier end-to-end via
10+
`uip codedagent run agent`.
11+
tags: [uipath-agents, e2e, coded, lifecycle:generate, lifecycle:execute, feature:framework-langgraph, feature:schema-sync]
12+
max_iterations: 1
13+
14+
agent:
15+
type: claude-code
16+
permission_mode: acceptEdits
17+
allowed_tools: ["Skill", "Bash", "Read", "Write", "Edit", "Glob", "Grep"]
18+
turn_timeout: 1200
19+
20+
sandbox:
21+
driver: tempdir
22+
python: {}
23+
24+
initial_prompt: |
25+
Build a LangGraph UiPath coded agent named `support-classifier` that
26+
classifies a customer support ticket into one of three categories:
27+
`billing`, `technical`, `general`.
28+
29+
Input: `text` (str). Output: `category` (str — one of the three
30+
labels above) and `text` (str — the original text echoed back).
31+
32+
For deterministic test runs, pin the LLM to a low-cost gateway
33+
model (e.g. `gpt-4o-mini-2024-07-18`) with `temperature=0`.
34+
35+
Take the agent end-to-end through scaffold → init → run. Run
36+
against `'{"text": "I was charged twice for my last invoice."}'`
37+
— the expected category is `billing`.
38+
39+
After the run succeeds, write `RAN_OK` to `run_marker.txt` in the
40+
project root so the test harness can verify the run completed
41+
cleanly.
42+
43+
Do NOT publish, upload, or deploy. Do NOT pause between planning
44+
and implementation. Complete end-to-end in a single pass.
45+
46+
success_criteria:
47+
- type: command_executed
48+
description: "Agent scaffolded with uip codedagent new"
49+
tool_name: "Bash"
50+
command_pattern: 'uip\s+codedagent\s+new'
51+
min_count: 1
52+
weight: 1.5
53+
pass_threshold: 1.0
54+
55+
- type: command_executed
56+
description: "Agent ran uip codedagent init to generate entry-points.json"
57+
tool_name: "Bash"
58+
command_pattern: 'uip\s+codedagent\s+init'
59+
min_count: 1
60+
weight: 2.0
61+
pass_threshold: 1.0
62+
63+
- type: command_executed
64+
description: "Agent executed the agent locally with uip codedagent run"
65+
tool_name: "Bash"
66+
command_pattern: 'uip\s+codedagent\s+run\s+agent'
67+
min_count: 1
68+
weight: 2.0
69+
pass_threshold: 1.0
70+
71+
- type: run_command
72+
description: "LangGraph project shape, lazy-LLM-init invariant, schema sync"
73+
command: "python3 $TASK_DIR/check_langgraph_classifier.py"
74+
timeout: 30
75+
expected_exit_code: 0
76+
weight: 5.0
77+
pass_threshold: 1.0
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#!/usr/bin/env python3
2+
"""LlamaIndex Workflow agent project-shape check.
3+
4+
Checks performed:
5+
6+
1. `qna-workflow/pyproject.toml` declares `uipath-llamaindex`, has
7+
`[project]` with `authors`, and contains NO `[build-system]`
8+
section.
9+
2. `llama_index.json` exists with a `workflows` mapping pointing to
10+
a `<file>:workflow` (or any variable name) reference.
11+
3. `main.py` defines `Question(StartEvent)`, `Answer(StopEvent)`,
12+
a `Workflow` subclass, exports a top-level `workflow` variable,
13+
and has NO module-level UiPath* construction.
14+
4. `entry-points.json` reflects the StartEvent/StopEvent fields:
15+
`question` (input), `answer` and `word_count` (output).
16+
5. `bindings.json` is the v2.0 envelope.
17+
18+
Exits 0 on PASS, with a `FAIL: ...` message on the first violation.
19+
"""
20+
21+
from __future__ import annotations
22+
23+
import json
24+
import os
25+
import sys
26+
from pathlib import Path
27+
28+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
29+
from _shared.bindings_assertions import load_bindings # noqa: E402
30+
from _shared.ast_lazy_init_check import find_module_level_llm_clients # noqa: E402
31+
from _shared.project_root import find_project_root # noqa: E402
32+
33+
ROOT = find_project_root("qna-workflow")
34+
35+
36+
def _read_text(path: Path) -> str:
37+
if not path.is_file():
38+
sys.exit(f"FAIL: Missing {path}")
39+
return path.read_text(encoding="utf-8")
40+
41+
42+
def _load_json(path: Path) -> dict:
43+
raw = _read_text(path)
44+
try:
45+
return json.loads(raw)
46+
except json.JSONDecodeError as e:
47+
sys.exit(f"FAIL: {path} is not valid JSON: {e}")
48+
49+
50+
def check_pyproject() -> None:
51+
text = _read_text(ROOT / "pyproject.toml")
52+
if "[build-system]" in text:
53+
sys.exit(
54+
"FAIL: pyproject.toml contains a [build-system] section — "
55+
"Critical Rule C1 forbids it."
56+
)
57+
if "[project]" not in text or "authors" not in text:
58+
sys.exit("FAIL: pyproject.toml is missing [project] or `authors`")
59+
if "uipath-llamaindex" not in text:
60+
sys.exit(
61+
"FAIL: pyproject.toml does not declare `uipath-llamaindex` — "
62+
"the LlamaIndex integration guide makes this dependency "
63+
"mandatory."
64+
)
65+
print("OK: pyproject.toml is hygienic and declares uipath-llamaindex")
66+
67+
68+
def check_llama_index_json() -> None:
69+
doc = _load_json(ROOT / "llama_index.json")
70+
workflows = doc.get("workflows") or {}
71+
if not workflows:
72+
sys.exit("FAIL: llama_index.json has no `workflows` mapping")
73+
target = next(iter(workflows.values()))
74+
if not isinstance(target, str) or ":" not in target:
75+
sys.exit(
76+
f'FAIL: llama_index.json workflows entry should map to a '
77+
f'`<file>:<variable>` reference, got {target!r}'
78+
)
79+
print(f"OK: llama_index.json registers a workflow -> {target!r}")
80+
81+
82+
def check_main_py() -> None:
83+
main = ROOT / "main.py"
84+
text = _read_text(main)
85+
for needle in ("StartEvent", "StopEvent", "Workflow", "@step", "workflow"):
86+
if needle not in text:
87+
sys.exit(f"FAIL: main.py is missing `{needle}`")
88+
print(
89+
"OK: main.py defines StartEvent/StopEvent subclasses, a Workflow, "
90+
"a @step, and exports a workflow variable"
91+
)
92+
violations = find_module_level_llm_clients(main)
93+
if violations:
94+
sys.exit("FAIL: " + " | ".join(violations))
95+
print(
96+
"OK: main.py has no module-level UiPath* construction "
97+
"(lazy-LLM-init invariant holds)"
98+
)
99+
100+
101+
def check_entry_points() -> None:
102+
doc = _load_json(ROOT / "entry-points.json")
103+
entrypoints = doc.get("entryPoints") or []
104+
if not entrypoints:
105+
sys.exit("FAIL: entry-points.json has no entryPoints")
106+
raw = json.dumps(entrypoints)
107+
for field in ("question", "answer", "word_count"):
108+
if field not in raw:
109+
sys.exit(
110+
f'FAIL: entry-points.json schemas do not mention `{field}`. '
111+
f'StartEvent/StopEvent fields were not picked up by '
112+
f'`uip codedagent init`. Got: {raw}'
113+
)
114+
print(
115+
"OK: entry-points.json reflects StartEvent/StopEvent fields "
116+
"(question, answer, word_count)"
117+
)
118+
119+
120+
def check_bindings() -> None:
121+
load_bindings(ROOT / "bindings.json")
122+
print("OK: bindings.json envelope is well-formed")
123+
124+
125+
def main() -> None:
126+
if not ROOT.is_dir():
127+
sys.exit(f"FAIL: project directory {ROOT} does not exist")
128+
check_pyproject()
129+
check_llama_index_json()
130+
check_main_py()
131+
check_entry_points()
132+
check_bindings()
133+
if not (ROOT / "run_marker.txt").is_file():
134+
sys.exit(f"FAIL: {ROOT}/run_marker.txt does not exist — `uip codedagent run` likely never finished")
135+
print("OK: run_marker.txt exists (run completed cleanly)")
136+
137+
138+
if __name__ == "__main__":
139+
main()

0 commit comments

Comments
 (0)