Skip to content

Commit 44809c7

Browse files
committed
Fix CI: apply yapf formatting and resolve Vale vocabulary errors
- Wrap Python package names in backticks in sandbox_agent README so Vale treats them as inline code. - Add FFmpeg (a product name, not a package) to Vale accept.txt. - Apply yapf reformatting and ruff fix (E501 on a long docstring URL) to files that previously failed the pre-commit hooks. Signed-off-by: Jerry Guan <jerryguan777@gmail.com>
1 parent eafbf33 commit 44809c7

8 files changed

Lines changed: 61 additions & 72 deletions

File tree

ci/vale/styles/config/vocabularies/nemo-agent-toolkit-examples/accept.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ DB(s?)
5353
[Ee]val
5454
[Ee]xplainability
5555
Faiss
56+
FFmpeg
5657
[Gg]eneratable
5758
glog
5859
GPU(s?)

examples/sandbox_agent/README.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -192,15 +192,15 @@ The sandbox provides an isolated workspace:
192192
```
193193

194194
**Pre-installed in nat-sandbox image:**
195-
- Data processing: pandas, NumPy, matplotlib, seaborn, SymPy
196-
- Web: requests, httpx, beautifulsoup4
197-
- Browser: playwright (Chromium)
198-
- PDF: pdfplumber, pypdf, pdf2image, poppler-utils
199-
- OCR: pytesseract, tesseract-ocr
200-
- Computer vision: opencv-python-headless
201-
- Audio: faster-whisper (with pre-downloaded tiny model), FFmpeg
202-
- Documents: python-pptx, python-docx, reportlab
203-
- Utilities: pillow, pyyaml, openpyxl
195+
- Data processing: `pandas`, `numpy`, `matplotlib`, `seaborn`, `sympy`
196+
- Web: `requests`, `httpx`, `beautifulsoup4`
197+
- Browser: `playwright` (Chromium)
198+
- PDF: `pdfplumber`, `pypdf`, `pdf2image`, `poppler-utils`
199+
- OCR: `pytesseract`, `tesseract-ocr`
200+
- Computer vision: `opencv-python-headless`
201+
- Audio: `faster-whisper` (with pre-downloaded tiny model), FFmpeg
202+
- Documents: `python-pptx`, `python-docx`, `reportlab`
203+
- Utilities: `pillow`, `pyyaml`, `openpyxl`
204204

205205
## GAIA Benchmark Evaluation
206206

examples/sandbox_agent/scripts/enrich_gaia_dataset.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,19 +92,21 @@ def enrich_dataset(input_path: str, output_path: str) -> None:
9292

9393
# Show a sample
9494
sample = df[has_file].iloc[0]
95-
print(f"\nSample enriched question (first 200 chars):")
95+
print("\nSample enriched question (first 200 chars):")
9696
print(f" {sample['Question'][:200]}")
9797

9898

9999
def main():
100100
parser = argparse.ArgumentParser(description="Enrich GAIA dataset with attachment file paths")
101101
parser.add_argument(
102-
"--input", "-i",
102+
"--input",
103+
"-i",
103104
default=str(_DEFAULT_INPUT),
104105
help=f"Input parquet path (default: {_DEFAULT_INPUT})",
105106
)
106107
parser.add_argument(
107-
"--output", "-o",
108+
"--output",
109+
"-o",
108110
default=str(_DEFAULT_OUTPUT),
109111
help=f"Output parquet path (default: {_DEFAULT_OUTPUT})",
110112
)

examples/sandbox_agent/src/nat_sandbox_agent/tools/host/image_describe.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,12 @@ class ImageDescribeInput(BaseModel):
5252
"""Input schema for image_describe tool."""
5353

5454
image_path: str = Field(
55-
description="Path to the image file inside the sandbox (e.g. /workspace/input/photo.png).",
56-
)
55+
description="Path to the image file inside the sandbox (e.g. /workspace/input/photo.png).", )
5756
question: str = Field(
5857
default="Describe this image in detail.",
59-
description=(
60-
"A specific question or instruction about the image. "
61-
"Examples: 'What text is visible?', 'Describe the geometric shapes.', "
62-
"'What colors are used in this chart?'"
63-
),
58+
description=("A specific question or instruction about the image. "
59+
"Examples: 'What text is visible?', 'Describe the geometric shapes.', "
60+
"'What colors are used in this chart?'"),
6461
)
6562

6663

@@ -120,12 +117,16 @@ async def describe(self, image_path: str, question: str = "Describe this image i
120117
data_uri = f"data:{mime_type};base64,{b64_data}"
121118

122119
# 4. Build multimodal message (LangChain standard format)
123-
message = HumanMessage(
124-
content=[
125-
{"type": "text", "text": question},
126-
{"type": "image_url", "image_url": {"url": data_uri}},
127-
]
128-
)
120+
message = HumanMessage(content=[
121+
{
122+
"type": "text", "text": question
123+
},
124+
{
125+
"type": "image_url", "image_url": {
126+
"url": data_uri
127+
}
128+
},
129+
])
129130

130131
# 5. Call vision LLM
131132
try:
@@ -164,13 +165,11 @@ def create_image_describe_tool(sandbox: BaseSandbox, vision_llm: Any) -> Structu
164165
return StructuredTool.from_function(
165166
coroutine=tool.describe,
166167
name="image_describe",
167-
description=(
168-
"Analyze an image file using a vision model. "
169-
"Reads the image from the sandbox and returns a text description. "
170-
"Use this for understanding visual content: charts, diagrams, geometric shapes, "
171-
"screenshots, handwritten text, musical notation, photos, etc. "
172-
"For pixel-level processing (cropping, color extraction, OCR coordinates), "
173-
"use the python tool with PIL/OpenCV instead."
174-
),
168+
description=("Analyze an image file using a vision model. "
169+
"Reads the image from the sandbox and returns a text description. "
170+
"Use this for understanding visual content: charts, diagrams, geometric shapes, "
171+
"screenshots, handwritten text, musical notation, photos, etc. "
172+
"For pixel-level processing (cropping, color extraction, OCR coordinates), "
173+
"use the python tool with PIL/OpenCV instead."),
175174
args_schema=ImageDescribeInput,
176175
)

examples/sandbox_agent/src/nat_sandbox_agent/tools/host/web_fetch.py

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,7 @@
3434
logger = logging.getLogger(__name__)
3535

3636
# Default user agent for fetch requests
37-
DEFAULT_USER_AGENT = (
38-
"Mozilla/5.0 (compatible; NATSandboxAgent/1.0; +https://github.com/NVIDIA/NeMo-Agent-Toolkit)"
39-
)
37+
DEFAULT_USER_AGENT = ("Mozilla/5.0 (compatible; NATSandboxAgent/1.0; +https://github.com/NVIDIA/NeMo-Agent-Toolkit)")
4038

4139
# Default max content length (characters) returned per call
4240
DEFAULT_MAX_LENGTH = 5000
@@ -54,10 +52,8 @@ class WebFetchInput(BaseModel):
5452
)
5553
start_index: int = Field(
5654
default=0,
57-
description=(
58-
"Character position to start reading from. "
59-
"Use this to paginate through long pages. Default is 0."
60-
),
55+
description=("Character position to start reading from. "
56+
"Use this to paginate through long pages. Default is 0."),
6157
ge=0,
6258
)
6359
raw: bool = Field(
@@ -93,9 +89,9 @@ async def web_fetch(
9389

9490
try:
9591
async with httpx.AsyncClient(
96-
follow_redirects=True,
97-
timeout=30.0,
98-
headers={"User-Agent": DEFAULT_USER_AGENT},
92+
follow_redirects=True,
93+
timeout=30.0,
94+
headers={"User-Agent": DEFAULT_USER_AGENT},
9995
) as client:
10096
response = await client.get(url)
10197
response.raise_for_status()
@@ -154,10 +150,8 @@ async def web_fetch(
154150
result["next_start_index"] = start_index + max_length
155151
result["remaining"] = total_length - (start_index + max_length)
156152

157-
logger.info(
158-
f"Web fetch returned {len(content)} chars "
159-
f"(total={total_length}, start={start_index})"
160-
)
153+
logger.info(f"Web fetch returned {len(content)} chars "
154+
f"(total={total_length}, start={start_index})")
161155
return result
162156

163157
except httpx.HTTPStatusError as e:
@@ -183,9 +177,7 @@ async def web_fetch(
183177
}
184178

185179

186-
def create_web_fetch_tool(
187-
max_output_chars: int = DEFAULT_MAX_OUTPUT_CHARS,
188-
) -> StructuredTool:
180+
def create_web_fetch_tool(max_output_chars: int = DEFAULT_MAX_OUTPUT_CHARS, ) -> StructuredTool:
189181
"""Create the web fetch tool.
190182
191183
Args:
@@ -196,19 +188,17 @@ def create_web_fetch_tool(
196188
"""
197189
return StructuredTool.from_function(
198190
coroutine=lambda url, max_length=DEFAULT_MAX_LENGTH, start_index=0, raw=False: web_fetch(
199-
url, max_length, start_index, raw, max_output_chars
200-
),
191+
url, max_length, start_index, raw, max_output_chars),
201192
name="web_fetch",
202-
description=(
203-
"Fetch a webpage and convert it to clean Markdown text. "
204-
"Much faster than web_browse but does NOT render JavaScript. "
205-
"Use this for static pages, articles, documentation, and API responses. "
206-
"Use 'start_index' to paginate through long content. "
207-
"Tip: also works with JSON APIs — useful URLs include: "
208-
"Wikipedia edit history: https://en.wikipedia.org/w/api.php?action=query&titles=TITLE&prop=revisions&rvlimit=50&rvprop=timestamp|comment|user&format=json ; "
209-
"GitHub issue events: https://api.github.com/repos/OWNER/REPO/issues/NUM/events ; "
210-
"GitHub issue timeline: https://api.github.com/repos/OWNER/REPO/issues/NUM/timeline ; "
211-
"arXiv monthly listings: https://arxiv.org/list/CATEGORY/YYMM"
212-
),
193+
description=("Fetch a webpage and convert it to clean Markdown text. "
194+
"Much faster than web_browse but does NOT render JavaScript. "
195+
"Use this for static pages, articles, documentation, and API responses. "
196+
"Use 'start_index' to paginate through long content. "
197+
"Tip: also works with JSON APIs — useful URLs include: "
198+
"Wikipedia edit history: https://en.wikipedia.org/w/api.php?action=query"
199+
"&titles=TITLE&prop=revisions&rvlimit=50&rvprop=timestamp|comment|user&format=json ; "
200+
"GitHub issue events: https://api.github.com/repos/OWNER/REPO/issues/NUM/events ; "
201+
"GitHub issue timeline: https://api.github.com/repos/OWNER/REPO/issues/NUM/timeline ; "
202+
"arXiv monthly listings: https://arxiv.org/list/CATEGORY/YYMM"),
213203
args_schema=WebFetchInput,
214204
)

examples/sandbox_agent/tests/test_daytona_sandbox.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,7 @@ def test_get_client_creates_client(self):
9797
):
9898
sandbox._get_client()
9999

100-
mock_config.assert_called_once_with(
101-
api_key="test-key",
102-
)
100+
mock_config.assert_called_once_with(api_key="test-key", )
103101
assert sandbox._client is not None
104102

105103

examples/sandbox_agent/tests/test_tools_host.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -288,9 +288,9 @@ async def test_http_error(self):
288288
mock_resp = MagicMock(spec=httpx.Response)
289289
mock_resp.status_code = 404
290290
mock_resp.reason_phrase = "Not Found"
291-
mock_resp.raise_for_status.side_effect = httpx.HTTPStatusError(
292-
"Not Found", request=MagicMock(), response=mock_resp
293-
)
291+
mock_resp.raise_for_status.side_effect = httpx.HTTPStatusError("Not Found",
292+
request=MagicMock(),
293+
response=mock_resp)
294294

295295
with patch("nat_sandbox_agent.tools.host.web_fetch.httpx.AsyncClient") as MockClient:
296296
mock_ctx = AsyncMock()
@@ -365,5 +365,3 @@ def test_accepts_custom_max_output_chars(self):
365365
"""Test that max_output_chars parameter is accepted."""
366366
tool = create_web_fetch_tool(max_output_chars=5000)
367367
assert tool is not None
368-
369-

examples/sandbox_agent/tests/test_tools_image_describe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ async def test_describe_xlsx_unsupported(self, image_tool):
108108
@pytest.mark.asyncio
109109
async def test_describe_file_not_found(self, image_tool, mock_sandbox):
110110
"""Test handling when image file does not exist."""
111-
mock_sandbox.read_file_bytes = AsyncMock(side_effect=FileNotFoundError("File not found: /workspace/input/missing.png"))
111+
mock_sandbox.read_file_bytes = AsyncMock(
112+
side_effect=FileNotFoundError("File not found: /workspace/input/missing.png"))
112113

113114
result = await image_tool.describe("/workspace/input/missing.png")
114115

0 commit comments

Comments
 (0)