Fix CI: apply yapf formatting and resolve Vale vocabulary errors

Jerryguan777 · Jerryguan777 · commit 44809c73bd5e · 2026-04-24T19:36:29.000-07:00
- Wrap Python package names in backticks in sandbox_agent README so Vale
  treats them as inline code.
- Add FFmpeg (a product name, not a package) to Vale accept.txt.
- Apply yapf reformatting and ruff fix (E501 on a long docstring URL)
  to files that previously failed the pre-commit hooks.

Signed-off-by: Jerry Guan &lt;jerryguan777@gmail.com&gt;
diff --git a/ci/vale/styles/config/vocabularies/nemo-agent-toolkit-examples/accept.txt b/ci/vale/styles/config/vocabularies/nemo-agent-toolkit-examples/accept.txt
@@ -53,6 +53,7 @@ DB(s?)
 [Ee]val
 [Ee]xplainability
 Faiss
+FFmpeg
 [Gg]eneratable
 glog
 GPU(s?)
diff --git a/examples/sandbox_agent/README.md b/examples/sandbox_agent/README.md
@@ -192,15 +192,15 @@ The sandbox provides an isolated workspace:
 ```
 
 **Pre-installed in nat-sandbox image:**
-- Data processing: pandas, NumPy, matplotlib, seaborn, SymPy
-- Web: requests, httpx, beautifulsoup4
-- Browser: playwright (Chromium)
-- PDF: pdfplumber, pypdf, pdf2image, poppler-utils
-- OCR: pytesseract, tesseract-ocr
-- Computer vision: opencv-python-headless
-- Audio: faster-whisper (with pre-downloaded tiny model), FFmpeg
-- Documents: python-pptx, python-docx, reportlab
-- Utilities: pillow, pyyaml, openpyxl
+- Data processing: `pandas`, `numpy`, `matplotlib`, `seaborn`, `sympy`
+- Web: `requests`, `httpx`, `beautifulsoup4`
+- Browser: `playwright` (Chromium)
+- PDF: `pdfplumber`, `pypdf`, `pdf2image`, `poppler-utils`
+- OCR: `pytesseract`, `tesseract-ocr`
+- Computer vision: `opencv-python-headless`
+- Audio: `faster-whisper` (with pre-downloaded tiny model), FFmpeg
+- Documents: `python-pptx`, `python-docx`, `reportlab`
+- Utilities: `pillow`, `pyyaml`, `openpyxl`
 
 ## GAIA Benchmark Evaluation
 
diff --git a/examples/sandbox_agent/scripts/enrich_gaia_dataset.py b/examples/sandbox_agent/scripts/enrich_gaia_dataset.py
@@ -92,19 +92,21 @@ def enrich_dataset(input_path: str, output_path: str) -> None:
 
     # Show a sample
     sample = df[has_file].iloc[0]
-    print(f"\nSample enriched question (first 200 chars):")
+    print("\nSample enriched question (first 200 chars):")
     print(f"  {sample['Question'][:200]}")
 
 
 def main():
     parser = argparse.ArgumentParser(description="Enrich GAIA dataset with attachment file paths")
     parser.add_argument(
-        "--input", "-i",
+        "--input",
+        "-i",
         default=str(_DEFAULT_INPUT),
         help=f"Input parquet path (default: {_DEFAULT_INPUT})",
     )
     parser.add_argument(
-        "--output", "-o",
+        "--output",
+        "-o",
         default=str(_DEFAULT_OUTPUT),
         help=f"Output parquet path (default: {_DEFAULT_OUTPUT})",
     )
diff --git a/examples/sandbox_agent/src/nat_sandbox_agent/tools/host/image_describe.py b/examples/sandbox_agent/src/nat_sandbox_agent/tools/host/image_describe.py
@@ -52,15 +52,12 @@ class ImageDescribeInput(BaseModel):
     """Input schema for image_describe tool."""
 
     image_path: str = Field(
-        description="Path to the image file inside the sandbox (e.g. /workspace/input/photo.png).",
-    )
+        description="Path to the image file inside the sandbox (e.g. /workspace/input/photo.png).", )
     question: str = Field(
         default="Describe this image in detail.",
-        description=(
-            "A specific question or instruction about the image. "
-            "Examples: 'What text is visible?', 'Describe the geometric shapes.', "
-            "'What colors are used in this chart?'"
-        ),
+        description=("A specific question or instruction about the image. "
+                     "Examples: 'What text is visible?', 'Describe the geometric shapes.', "
+                     "'What colors are used in this chart?'"),
     )
 
 
@@ -120,12 +117,16 @@ async def describe(self, image_path: str, question: str = "Describe this image i
         data_uri = f"data:{mime_type};base64,{b64_data}"
 
         # 4. Build multimodal message (LangChain standard format)
-        message = HumanMessage(
-            content=[
-                {"type": "text", "text": question},
-                {"type": "image_url", "image_url": {"url": data_uri}},
-            ]
-        )
+        message = HumanMessage(content=[
+            {
+                "type": "text", "text": question
+            },
+            {
+                "type": "image_url", "image_url": {
+                    "url": data_uri
+                }
+            },
+        ])
 
         # 5. Call vision LLM
         try:
@@ -164,13 +165,11 @@ def create_image_describe_tool(sandbox: BaseSandbox, vision_llm: Any) -> Structu
     return StructuredTool.from_function(
         coroutine=tool.describe,
         name="image_describe",
-        description=(
-            "Analyze an image file using a vision model. "
-            "Reads the image from the sandbox and returns a text description. "
-            "Use this for understanding visual content: charts, diagrams, geometric shapes, "
-            "screenshots, handwritten text, musical notation, photos, etc. "
-            "For pixel-level processing (cropping, color extraction, OCR coordinates), "
-            "use the python tool with PIL/OpenCV instead."
-        ),
+        description=("Analyze an image file using a vision model. "
+                     "Reads the image from the sandbox and returns a text description. "
+                     "Use this for understanding visual content: charts, diagrams, geometric shapes, "
+                     "screenshots, handwritten text, musical notation, photos, etc. "
+                     "For pixel-level processing (cropping, color extraction, OCR coordinates), "
+                     "use the python tool with PIL/OpenCV instead."),
         args_schema=ImageDescribeInput,
     )
diff --git a/examples/sandbox_agent/src/nat_sandbox_agent/tools/host/web_fetch.py b/examples/sandbox_agent/src/nat_sandbox_agent/tools/host/web_fetch.py
@@ -34,9 +34,7 @@
 logger = logging.getLogger(__name__)
 
 # Default user agent for fetch requests
-DEFAULT_USER_AGENT = (
-    "Mozilla/5.0 (compatible; NATSandboxAgent/1.0; +https://github.com/NVIDIA/NeMo-Agent-Toolkit)"
-)
+DEFAULT_USER_AGENT = ("Mozilla/5.0 (compatible; NATSandboxAgent/1.0; +https://github.com/NVIDIA/NeMo-Agent-Toolkit)")
 
 # Default max content length (characters) returned per call
 DEFAULT_MAX_LENGTH = 5000
@@ -54,10 +52,8 @@ class WebFetchInput(BaseModel):
     )
     start_index: int = Field(
         default=0,
-        description=(
-            "Character position to start reading from. "
-            "Use this to paginate through long pages. Default is 0."
-        ),
+        description=("Character position to start reading from. "
+                     "Use this to paginate through long pages. Default is 0."),
         ge=0,
     )
     raw: bool = Field(
@@ -93,9 +89,9 @@ async def web_fetch(
 
     try:
         async with httpx.AsyncClient(
-            follow_redirects=True,
-            timeout=30.0,
-            headers={"User-Agent": DEFAULT_USER_AGENT},
+                follow_redirects=True,
+                timeout=30.0,
+                headers={"User-Agent": DEFAULT_USER_AGENT},
         ) as client:
             response = await client.get(url)
             response.raise_for_status()
@@ -154,10 +150,8 @@ async def web_fetch(
             result["next_start_index"] = start_index + max_length
             result["remaining"] = total_length - (start_index + max_length)
 
-        logger.info(
-            f"Web fetch returned {len(content)} chars "
-            f"(total={total_length}, start={start_index})"
-        )
+        logger.info(f"Web fetch returned {len(content)} chars "
+                    f"(total={total_length}, start={start_index})")
         return result
 
     except httpx.HTTPStatusError as e:
@@ -183,9 +177,7 @@ async def web_fetch(
         }
 
 
-def create_web_fetch_tool(
-    max_output_chars: int = DEFAULT_MAX_OUTPUT_CHARS,
-) -> StructuredTool:
+def create_web_fetch_tool(max_output_chars: int = DEFAULT_MAX_OUTPUT_CHARS, ) -> StructuredTool:
     """Create the web fetch tool.
 
     Args:
@@ -196,19 +188,17 @@ def create_web_fetch_tool(
     """
     return StructuredTool.from_function(
         coroutine=lambda url, max_length=DEFAULT_MAX_LENGTH, start_index=0, raw=False: web_fetch(
-            url, max_length, start_index, raw, max_output_chars
-        ),
+            url, max_length, start_index, raw, max_output_chars),
         name="web_fetch",
-        description=(
-            "Fetch a webpage and convert it to clean Markdown text. "
-            "Much faster than web_browse but does NOT render JavaScript. "
-            "Use this for static pages, articles, documentation, and API responses. "
-            "Use 'start_index' to paginate through long content. "
-            "Tip: also works with JSON APIs — useful URLs include: "
-            "Wikipedia edit history: https://en.wikipedia.org/w/api.php?action=query&titles=TITLE&prop=revisions&rvlimit=50&rvprop=timestamp|comment|user&format=json ; "
-            "GitHub issue events: https://api.github.com/repos/OWNER/REPO/issues/NUM/events ; "
-            "GitHub issue timeline: https://api.github.com/repos/OWNER/REPO/issues/NUM/timeline ; "
-            "arXiv monthly listings: https://arxiv.org/list/CATEGORY/YYMM"
-        ),
+        description=("Fetch a webpage and convert it to clean Markdown text. "
+                     "Much faster than web_browse but does NOT render JavaScript. "
+                     "Use this for static pages, articles, documentation, and API responses. "
+                     "Use 'start_index' to paginate through long content. "
+                     "Tip: also works with JSON APIs — useful URLs include: "
+                     "Wikipedia edit history: https://en.wikipedia.org/w/api.php?action=query"
+                     "&titles=TITLE&prop=revisions&rvlimit=50&rvprop=timestamp|comment|user&format=json ; "
+                     "GitHub issue events: https://api.github.com/repos/OWNER/REPO/issues/NUM/events ; "
+                     "GitHub issue timeline: https://api.github.com/repos/OWNER/REPO/issues/NUM/timeline ; "
+                     "arXiv monthly listings: https://arxiv.org/list/CATEGORY/YYMM"),
         args_schema=WebFetchInput,
     )
diff --git a/examples/sandbox_agent/tests/test_daytona_sandbox.py b/examples/sandbox_agent/tests/test_daytona_sandbox.py
@@ -97,9 +97,7 @@ def test_get_client_creates_client(self):
         ):
             sandbox._get_client()
 
-            mock_config.assert_called_once_with(
-                api_key="test-key",
-            )
+            mock_config.assert_called_once_with(api_key="test-key", )
             assert sandbox._client is not None
 
 
diff --git a/examples/sandbox_agent/tests/test_tools_host.py b/examples/sandbox_agent/tests/test_tools_host.py
@@ -288,9 +288,9 @@ async def test_http_error(self):
         mock_resp = MagicMock(spec=httpx.Response)
         mock_resp.status_code = 404
         mock_resp.reason_phrase = "Not Found"
-        mock_resp.raise_for_status.side_effect = httpx.HTTPStatusError(
-            "Not Found", request=MagicMock(), response=mock_resp
-        )
+        mock_resp.raise_for_status.side_effect = httpx.HTTPStatusError("Not Found",
+                                                                       request=MagicMock(),
+                                                                       response=mock_resp)
 
         with patch("nat_sandbox_agent.tools.host.web_fetch.httpx.AsyncClient") as MockClient:
             mock_ctx = AsyncMock()
@@ -365,5 +365,3 @@ def test_accepts_custom_max_output_chars(self):
         """Test that max_output_chars parameter is accepted."""
         tool = create_web_fetch_tool(max_output_chars=5000)
         assert tool is not None
-
-
diff --git a/examples/sandbox_agent/tests/test_tools_image_describe.py b/examples/sandbox_agent/tests/test_tools_image_describe.py
@@ -108,7 +108,8 @@ async def test_describe_xlsx_unsupported(self, image_tool):
     @pytest.mark.asyncio
     async def test_describe_file_not_found(self, image_tool, mock_sandbox):
         """Test handling when image file does not exist."""
-        mock_sandbox.read_file_bytes = AsyncMock(side_effect=FileNotFoundError("File not found: /workspace/input/missing.png"))
+        mock_sandbox.read_file_bytes = AsyncMock(
+            side_effect=FileNotFoundError("File not found: /workspace/input/missing.png"))
 
         result = await image_tool.describe("/workspace/input/missing.png")