|
5 | 5 | import asyncio |
6 | 6 | import logging |
7 | 7 | import subprocess |
| 8 | +import uuid |
8 | 9 | from datetime import UTC, datetime |
9 | 10 | from pathlib import Path |
10 | 11 |
|
@@ -259,6 +260,9 @@ async def fake_scrape( |
259 | 260 | invalid_extra_resp = await client.get("/alpha/read?bad!param=1") |
260 | 261 | assert invalid_extra_resp.status_code == 400 |
261 | 262 | assert invalid_extra_resp.json()["error"]["code"] == "INVALID_PARAMS" |
| 263 | + invalid_identifier_resp = await client.get("/alpha/read?model-id=1") |
| 264 | + assert invalid_identifier_resp.status_code == 400 |
| 265 | + assert invalid_identifier_resp.json()["error"]["code"] == "INVALID_PARAMS" |
262 | 266 |
|
263 | 267 | # Non-existent endpoint on a recipe (404 from FastAPI) |
264 | 268 | unknown_ep_resp = await client.get("/beta/search") |
@@ -527,6 +531,126 @@ async def test_recipe_management_uninstall_force_for_unmanaged_local( |
527 | 531 | assert "local-only" not in slugs |
528 | 532 |
|
529 | 533 |
|
| 534 | +@pytest.mark.asyncio |
| 535 | +async def test_mcp_bridge_preserves_special_characters_in_params( |
| 536 | + tmp_path: Path, |
| 537 | + monkeypatch: pytest.MonkeyPatch, |
| 538 | +) -> None: |
| 539 | + recipes_dir = tmp_path / "recipes" |
| 540 | + _write_recipe( |
| 541 | + recipes_dir, |
| 542 | + "alpha", |
| 543 | + endpoints={ |
| 544 | + "search": { |
| 545 | + "url": "https://example.com/search?q={query}&page={page}", |
| 546 | + "requires_query": True, |
| 547 | + "params": { |
| 548 | + "tools_url": { |
| 549 | + "description": "MCP bridge URL", |
| 550 | + "required": False, |
| 551 | + }, |
| 552 | + }, |
| 553 | + "items": {"container": ".item", "fields": {"title": {"selector": ".title"}}}, |
| 554 | + "pagination": {"type": "page_param", "param": "page"}, |
| 555 | + }, |
| 556 | + }, |
| 557 | + ) |
| 558 | + |
| 559 | + captured: dict[str, object] = {} |
| 560 | + |
| 561 | + async def fake_scrape( |
| 562 | + *, |
| 563 | + pool: FakePool, |
| 564 | + recipe, |
| 565 | + endpoint: str, |
| 566 | + page: int = 1, |
| 567 | + query: str | None = None, |
| 568 | + extra_params: dict[str, str] | None = None, |
| 569 | + scrape_timeout: float = 30.0, |
| 570 | + ) -> ApiResponse: |
| 571 | + _ = pool, recipe, endpoint, page, scrape_timeout |
| 572 | + captured["query"] = query |
| 573 | + captured["extra_params"] = dict(extra_params or {}) |
| 574 | + return _success_response(slug="alpha", endpoint="search", page=1, query=query) |
| 575 | + |
| 576 | + monkeypatch.setattr("web2api.main.scrape", fake_scrape) |
| 577 | + |
| 578 | + fake_pool = FakePool() |
| 579 | + app = create_app(recipes_dir=recipes_dir, pool=fake_pool) |
| 580 | + |
| 581 | + async with app.router.lifespan_context(app): |
| 582 | + transport = ASGITransport(app=app) |
| 583 | + async with AsyncClient(transport=transport, base_url="http://testserver") as client: |
| 584 | + response = await client.post( |
| 585 | + "/mcp/tools/alpha__search", |
| 586 | + json={ |
| 587 | + "q": "cats & dogs", |
| 588 | + "tools_url": "http://localhost:8100/mcp/tools?x=1&y=2", |
| 589 | + }, |
| 590 | + ) |
| 591 | + |
| 592 | + assert response.status_code == 200 |
| 593 | + assert captured["query"] == "cats & dogs" |
| 594 | + assert captured["extra_params"] == { |
| 595 | + "tools_url": "http://localhost:8100/mcp/tools?x=1&y=2", |
| 596 | + } |
| 597 | + |
| 598 | + |
| 599 | +@pytest.mark.asyncio |
| 600 | +async def test_post_upload_rejects_path_traversal_filenames( |
| 601 | + tmp_path: Path, |
| 602 | + monkeypatch: pytest.MonkeyPatch, |
| 603 | +) -> None: |
| 604 | + recipes_dir = tmp_path / "recipes" |
| 605 | + _write_recipe(recipes_dir, "alpha") |
| 606 | + |
| 607 | + captured: dict[str, object] = {} |
| 608 | + escaped_name = f"web2api_escape_{uuid.uuid4().hex}.txt" |
| 609 | + escaped_path = Path("/tmp") / escaped_name |
| 610 | + if escaped_path.exists(): |
| 611 | + escaped_path.unlink() |
| 612 | + |
| 613 | + async def fake_scrape( |
| 614 | + *, |
| 615 | + pool: FakePool, |
| 616 | + recipe, |
| 617 | + endpoint: str, |
| 618 | + page: int = 1, |
| 619 | + query: str | None = None, |
| 620 | + extra_params: dict[str, str] | None = None, |
| 621 | + scrape_timeout: float = 30.0, |
| 622 | + ) -> ApiResponse: |
| 623 | + _ = pool, recipe, endpoint, page, query, scrape_timeout |
| 624 | + captured["extra_params"] = dict(extra_params or {}) |
| 625 | + return _success_response(slug="alpha", endpoint="read", page=1) |
| 626 | + |
| 627 | + monkeypatch.setattr("web2api.main.scrape", fake_scrape) |
| 628 | + |
| 629 | + fake_pool = FakePool() |
| 630 | + app = create_app(recipes_dir=recipes_dir, pool=fake_pool) |
| 631 | + |
| 632 | + async with app.router.lifespan_context(app): |
| 633 | + transport = ASGITransport(app=app) |
| 634 | + async with AsyncClient(transport=transport, base_url="http://testserver") as client: |
| 635 | + response = await client.post( |
| 636 | + "/alpha/read", |
| 637 | + files={ |
| 638 | + "files": (f"../{escaped_name}", b"uploaded-content", "text/plain"), |
| 639 | + }, |
| 640 | + ) |
| 641 | + |
| 642 | + assert response.status_code == 200 |
| 643 | + assert escaped_path.exists() is False |
| 644 | + extra_params = captured.get("extra_params") |
| 645 | + assert isinstance(extra_params, dict) |
| 646 | + file_paths = extra_params.get("file_paths", []) |
| 647 | + assert isinstance(file_paths, list) |
| 648 | + assert len(file_paths) == 1 |
| 649 | + uploaded_path = Path(str(file_paths[0])) |
| 650 | + assert uploaded_path.name == escaped_name |
| 651 | + assert "/../" not in str(uploaded_path) |
| 652 | + |
| 653 | + |
530 | 654 | @pytest.mark.asyncio |
531 | 655 | async def test_check_updates_endpoint( |
532 | 656 | tmp_path: Path, |
|
0 commit comments