diff --git a/AGENTS.md b/AGENTS.md index 310ff230..de633abe 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -241,6 +241,10 @@ Before generating or modifying code, read the relevant spec folders: - **[`middleware/api/spec/document-store/`](middleware/api/spec/document-store/)** — CouchDB persistence layer, race-condition-safe initialization, and content-hash idempotency. - **[`middleware/api/spec/harvest-manager/`](middleware/api/spec/harvest-manager/)** — Harvest run lifecycle, ownership validation, and progress tracking. +**API Client component** (`middleware/api_client/spec/`) — client internals: + +- **[`middleware/api_client/spec/harvest-client/`](middleware/api_client/spec/harvest-client/)** — Harvest lifecycle: parallel ARC submission, per-item error collection (`HarvestError`, `HarvestErrorType`), typed statistics (`HarvestStatistics`), and compatibility shim for issue #240. + For the AI agent workflow documentation, see [`docs/ai_workflow.md`](docs/ai_workflow.md). ### Spec-to-Code Mapping @@ -256,6 +260,7 @@ The `spec-to-code` agent uses this table in Step 3 to locate affected code. | `middleware/api/spec/harvest-manager/` | `middleware/api/src/middleware/api/business_logic/harvest_manager.py` | | `middleware/api/spec/arc-upload/` | `middleware/api/src/middleware/api/api/v3/arcs.py` | | `middleware/api/spec/harvest-arc-upload/` | `middleware/api/src/middleware/api/api/v3/harvests.py` | +| `middleware/api_client/spec/harvest-client/` | `middleware/api_client/src/middleware/api_client/api_client.py`, `models.py` | | `spec/` (project-level) | Follow links in **Architecture & Design** above to the affected component. | --- diff --git a/docker/Dockerfile.api b/docker/Dockerfile.api index 6999b475..3e103272 100644 --- a/docker/Dockerfile.api +++ b/docker/Dockerfile.api @@ -15,7 +15,7 @@ COPY pyproject.toml uv.lock ./ COPY middleware ./middleware # Upgrade pip and install uv -RUN pip install --no-cache-dir --upgrade pip==26.0.1 uv==0.11.7 +RUN pip install --no-cache-dir --upgrade pip==26.1.1 uv==0.11.16 # Build wheels RUN uv build --package fairagro-middleware-shared --wheel && \ @@ -38,7 +38,7 @@ RUN apk add --no-cache \ WORKDIR /build # Install uv and PyInstaller -RUN pip install --no-cache-dir --upgrade pip==26.0.1 uv==0.11.7 +RUN pip install --no-cache-dir --upgrade pip==26.1.1 uv==0.11.16 # Copy built wheel from package-builder stage COPY --from=package-builder /build/dist/*.whl /tmp/wheels/ @@ -100,7 +100,7 @@ ENV UVICORN_LOG_LEVEL=info # Create non-root user and group and fix permissions RUN apk add --no-cache --upgrade \ - curl=8.17.0-r1 \ + curl=8.19.0-r0 \ git=2.52.0-r0 \ zlib=1.3.2-r0 \ tzdata \ diff --git a/docs/architecture.md b/docs/architecture.md index 0ba3d2c3..e58f7f0e 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -9,13 +9,17 @@ architecture-beta group rdi2(database)[RDI 2] service rdi2db(database)[DB] in rdi2 - service csw(server)[csw] in rdi2 + service csw(server)["CSW / INSPIRE"] in rdi2 + + group rdi3(database)[RDI 3] + service rdi3db(database)[DB] in rdi3 + service web(internet)["Web / schema.org"] in rdi3 group middleware(cloud)[Middleware] service api(server)[API] in middleware service db(database)[CouchDB] in middleware service git(database)[DataHUB] in middleware - service inspire2arc(server)[inspire2arc] in middleware + service harvester(server)[Harvester] in middleware service searchhub(server)[SearchHUB] service sciwin(server)[SciWIn] @@ -25,8 +29,10 @@ architecture-beta sql2arc:L --> R:rdi1db sql2arc:R --> L:api csw:L --> R:rdi2db - inspire2arc:T --> B:api - inspire2arc:L --> R:csw + web:L --> R:rdi3db + harvester:T --> B:api + harvester:L --> R:csw + harvester:B --> T:web searchhub:L --> R:git sciwin:L --> B:git ``` diff --git a/middleware/api_client/spec/harvest-client/design.md b/middleware/api_client/spec/harvest-client/design.md new file mode 100644 index 00000000..d38eab96 --- /dev/null +++ b/middleware/api_client/spec/harvest-client/design.md @@ -0,0 +1,62 @@ +# Harvest Client — Design + +## Module Overview + +`ApiClient` (`api_client.py`) orchestrates the harvest lifecycle. +`HarvestResult`, `HarvestStatistics`, `HarvestError`, and `HarvestErrorType` +(`models.py`) are the stable public types exposed to harvesters. + +```text +harvester + └─→ ApiClient.harvest_arcs(rdi, arcs) + ├─→ create_harvest → HarvestResult (RUNNING) + ├─→ _submit_arcs_parallel + │ ├─→ duplicate check (client-side) → HarvestError(DUPLICATE) + │ └─→ POST v3/harvests/{id}/arcs → HarvestError(SUBMISSION_FAILED) on error + └─→ complete_harvest → HarvestResult (COMPLETED) + └─→ inject client_errors via model_copy → HarvestResult.errors +``` + +## Key Decisions + +1. **`HarvestStatistics` is a typed Pydantic model, not `dict`** + — The server serializes its internal `HarvestStatistics` via `model_dump()` + before sending it over the wire. The field names and types are stable and + known. A typed model gives consumers validated, IDE-navigable fields rather + than requiring dict key lookups with no type safety. + +2. **`HarvestError` is a client-facing type in `models.py`, independent of any server model** + — Per-item errors are currently generated client-side. When the server + persists them natively (issue #240), `_parse_harvest_response` will + populate `HarvestResult.errors` from the server response automatically — + the type and consumer interface remain unchanged. + +3. **`arc_id: str | None` in `HarvestError`** + — The `DUPLICATE` and `SUBMISSION_FAILED` categories always have a + known ARC identifier (when one is extractable from the RO-Crate). Future + error categories — such as harvest-level timeouts or config failures — + may not be associated with any specific ARC. `None` is the semantically + correct representation; an empty string would be an invisible sentinel + value that callers would need to treat specially. + +4. **Client-side error collection as compatibility shim until issue #240** + — `harvest_arcs()` collects errors from `_submit_arcs_parallel()` and + merges them into the server response via `model_copy(update=...)`. + This shim is removed once the server persists and returns per-item errors + natively. The `model_copy` merge is additive: if the server already + returns errors in its response (post-#240), client-side errors are + appended rather than overwriting. + +5. **Duplicate detection is performed client-side before the HTTP request** + — Submitting both duplicates would cause the server to process two ARCs + with the same identifier in the same harvest run, resulting in an opaque + conflict. Client-side detection gives an explicit `DUPLICATE` error, + prevents the wasted round-trip, and avoids requiring the server to handle + intra-harvest identity conflicts. + +6. **Item-level failures are non-fatal; harvest-level failures are fatal** + — A submission failure for one ARC (e.g. server 422 on bad content) must + not abort the entire harvest because the remaining ARCs may be valid. A + catastrophic failure (e.g. 401 Unauthorized, harvest already closed) means + no further submissions will succeed, so the harvest is aborted, marked + `FAILED`, and the exception propagates to the caller. diff --git a/middleware/api_client/spec/harvest-client/spec.md b/middleware/api_client/spec/harvest-client/spec.md new file mode 100644 index 00000000..2e91ff20 --- /dev/null +++ b/middleware/api_client/spec/harvest-client/spec.md @@ -0,0 +1,50 @@ +# Harvest Client + +Manage the full lifecycle of a harvest run — creation, parallel ARC +submission, error collection, and finalization — on behalf of a harvester +process. The client returns a typed result that captures both statistics +and per-item errors so harvesters can produce complete reports. + +## Requirements + +- [ ] Create a harvest run for a given RDI, submit all ARCs from an async + source in bounded parallelism, and return the completed harvest result + as a single operation. +- [ ] Accept an optional expected-dataset count at the start of a harvest to + enable progress tracking on the server side. +- [ ] Return typed harvest statistics (submitted, new, updated, unchanged, + missing counts, and optional expected-dataset count) as structured + fields rather than an opaque mapping. +- [ ] Record per-item errors encountered during submission and include them + in the returned harvest result. +- [ ] Classify each per-item error into one of the following categories: + `duplicate` (two ARCs share the same identifier) or `submission_failed` + (the server rejected or could not process the ARC). +- [ ] Each per-item error carries: the error category, a human-readable + message, and an ISO 8601 timestamp of when the error occurred. +- [ ] Optionally associate a per-item error with an ARC identifier; errors + that do not relate to a specific ARC (e.g. harvest-level failures) may + omit the identifier. +- [ ] Detect duplicate ARC identifiers before submission and record them as + `duplicate` errors; do not submit the duplicate. +- [ ] Skip individual ARC submission failures and continue the harvest with + remaining items; record each failure as a `submission_failed` error. +- [ ] Abort the entire harvest on catastrophic errors (e.g. authentication + failure, invalid harvest state) and mark the harvest as failed before + propagating the exception to the caller. + +## Edge Cases + +ARC with no extractable RO-Crate identifier → submitted normally; any +resulting error records no ARC identifier (`null`). + +Two ARCs share the same identifier → the second is skipped; a `duplicate` +error is recorded for it; the first continues to be submitted normally. + +Catastrophic error during submission → remaining tasks are cancelled; the +harvest is transitioned to `FAILED`; the exception propagates to the caller. + +No per-item errors → the returned result contains an empty errors list. + +`expected_datasets` not provided → harvest is created without a progress +denominator; statistics show raw counts only. diff --git a/middleware/api_client/src/middleware/api_client/__init__.py b/middleware/api_client/src/middleware/api_client/__init__.py index 9b04869a..5cdfe376 100644 --- a/middleware/api_client/src/middleware/api_client/__init__.py +++ b/middleware/api_client/src/middleware/api_client/__init__.py @@ -2,7 +2,18 @@ from .api_client import ApiClient, ApiClientError from .config import Config -from .models import ArcEventSummary, ArcLifecycleStatus, ArcMetadata, ArcResult, ArcStatus, HarvestResult, HarvestStatus +from .models import ( + ArcEventSummary, + ArcLifecycleStatus, + ArcMetadata, + ArcResult, + ArcStatus, + HarvestError, + HarvestErrorType, + HarvestResult, + HarvestStatistics, + HarvestStatus, +) __all__ = [ "Config", @@ -14,5 +25,8 @@ "ArcMetadata", "ArcEventSummary", "HarvestResult", + "HarvestStatistics", "HarvestStatus", + "HarvestError", + "HarvestErrorType", ] diff --git a/middleware/api_client/src/middleware/api_client/api_client.py b/middleware/api_client/src/middleware/api_client/api_client.py index 4ce1ee23..13ac9172 100644 --- a/middleware/api_client/src/middleware/api_client/api_client.py +++ b/middleware/api_client/src/middleware/api_client/api_client.py @@ -7,6 +7,7 @@ import threading from collections.abc import AsyncGenerator, AsyncIterator from contextlib import asynccontextmanager +from datetime import UTC, datetime from http import HTTPStatus from typing import TYPE_CHECKING, Any, cast @@ -22,7 +23,7 @@ ) from .config import Config -from .models import ArcResult, HarvestResult +from .models import ArcResult, HarvestError, HarvestErrorType, HarvestResult, HarvestStatus if TYPE_CHECKING: from arctrl import ARC # type: ignore[import-untyped] @@ -246,29 +247,46 @@ def _process_completed_arc_tasks( self, harvest_id: str, done_tasks: set[asyncio.Task[None]], - ) -> tuple[int, Exception | None]: - """Return (failed_count, catastrophic_error) for completed submission tasks.""" - failed_submissions = 0 + task_identifiers: dict[asyncio.Task[None], str | None], + ) -> tuple[list[HarvestError], Exception | None]: + """Return (errors, catastrophic_error) for completed submission tasks.""" + errors: list[HarvestError] = [] + catastrophic_error: Exception | None = None for done_task in done_tasks: + arc_id = task_identifiers.pop(done_task, None) try: done_task.result() except Exception as e: # noqa: BLE001 if self._is_catastrophic_harvest_error(e): - return failed_submissions, e - failed_submissions += 1 - logger.warning("Skipping failed ARC submission in harvest %s: %s", harvest_id, e) + if catastrophic_error is None: + catastrophic_error = e + else: + errors.append( + HarvestError( + arc_id=arc_id, + error_type=HarvestErrorType.SUBMISSION_FAILED, + message=str(e), + timestamp=datetime.now(UTC).isoformat(), + ) + ) + logger.warning("Skipping failed ARC submission in harvest %s: %s", harvest_id, e) - return failed_submissions, None + return errors, catastrophic_error async def _submit_arcs_parallel( self, harvest_id: str, arcs: "AsyncGenerator[ARC | dict[str, Any] | str, None] | AsyncIterator[ARC | dict[str, Any] | str]", - ) -> int: - """Submit all ARCs in bounded parallelism and return number of skipped ARC submissions.""" + ) -> list[HarvestError]: + """Submit all ARCs in bounded parallelism and return per-item errors. + + Compatibility shim (issue #240): duplicate detection and submission + failures are recorded client-side until the server persists them natively. + """ pending_tasks: set[asyncio.Task[None]] = set() - failed_submissions = 0 + task_identifiers: dict[asyncio.Task[None], str | None] = {} + errors: list[HarvestError] = [] seen_identifiers: set[str] = set() async def submit_one(arc_item: dict[str, Any]) -> None: @@ -281,35 +299,43 @@ async def submit_one(arc_item: dict[str, Any]) -> None: if identifier is not None: if identifier in seen_identifiers: logger.error( - "Skipping duplicate ARC identifier '%s' in harvest %s. " - "Two ARCs share the same identifier — this is a client-side data error.", + "Duplicate ARC identifier '%s' in harvest %s — " + "two ARCs share the same identifier (client-side data error).", identifier, harvest_id, ) - failed_submissions += 1 + errors.append( + HarvestError( + arc_id=identifier, + error_type=HarvestErrorType.DUPLICATE, + message=f"Duplicate ARC identifier '{identifier}' — two ARCs share the same identifier", + timestamp=datetime.now(UTC).isoformat(), + ) + ) continue seen_identifiers.add(identifier) task = asyncio.create_task(submit_one(serialized)) + task_identifiers[task] = identifier pending_tasks.add(task) if len(pending_tasks) >= self._config.max_concurrency: done, pending = await asyncio.wait(pending_tasks, return_when=asyncio.FIRST_COMPLETED) pending_tasks = pending - failed_delta, catastrophic_error = self._process_completed_arc_tasks(harvest_id, done) - failed_submissions += failed_delta + new_errors, catastrophic_error = self._process_completed_arc_tasks(harvest_id, done, task_identifiers) + errors.extend(new_errors) if catastrophic_error is not None: await self._cancel_pending_arc_tasks(pending_tasks) raise catastrophic_error if pending_tasks: done, _ = await asyncio.wait(pending_tasks) - failed_delta, catastrophic_error = self._process_completed_arc_tasks(harvest_id, done) - failed_submissions += failed_delta + new_errors, catastrophic_error = self._process_completed_arc_tasks(harvest_id, done, task_identifiers) + errors.extend(new_errors) if catastrophic_error is not None: raise catastrophic_error - return failed_submissions + return errors def __init__(self, config: Config) -> None: """Initialize the ApiClient. @@ -572,25 +598,32 @@ async def create_harvest( data = await self._post("v3/harvests", request) return self._parse_harvest_response(data) - async def list_harvests(self, rdi: str | None = None) -> list[HarvestResult]: - """List harvest runs. + async def list_harvests( + self, + rdi: str | None = None, + status: HarvestStatus | None = None, + limit: int = 20, + offset: int = 0, + ) -> list[HarvestResult]: + """List harvest runs, newest first. - Uses ``GET /v3/harvests``. + .. note:: + Not yet implemented — requires server-side changes (status filter, + guaranteed newest-first sort order). Tracked in GitHub issue #242. Args: rdi: Optional RDI filter. + status: Optional status filter (e.g. ``HarvestStatus.RUNNING``). + limit: Maximum number of results to return (default 20). + offset: Number of records to skip for pagination (default 0). - Returns: - List of :class:`HarvestResult` objects. + Raises: + NotImplementedError: Always — pending server-side support. """ - params: dict[str, str] | None = None - if rdi: - params = {"rdi": rdi} - data = await self._get("v3/harvests", params=params) - try: - return [HarvestResult.model_validate(d) for d in data] - except ValidationError as e: - raise ApiClientError(f"Invalid harvest list response from API: {e}") from e + raise NotImplementedError( + "list_harvests requires server-side changes (status filter, guaranteed " + "newest-first sort order). See GitHub issue #242." + ) async def get_harvest(self, harvest_id: str) -> HarvestResult: """Get a single harvest run by ID. @@ -721,7 +754,7 @@ async def my_arcs() -> AsyncGenerator[dict | str, None]: logger.info("[%s] Started harvest %s for RDI %s", rdi, harvest_id, rdi) try: - failed_submissions = await self._submit_arcs_parallel(harvest_id, arcs) + client_errors = await self._submit_arcs_parallel(harvest_id, arcs) except Exception: logger.warning( "[%s] Catastrophic error during ARC submission, marking harvest %s as failed", rdi, harvest_id @@ -729,15 +762,21 @@ async def my_arcs() -> AsyncGenerator[dict | str, None]: await self._fail_harvest_safely(rdi, harvest_id) raise - if failed_submissions > 0: + if client_errors: logger.warning( - "[%s] Harvest %s completed with %d skipped ARC submissions", + "[%s] Harvest %s has %d per-item error(s)", rdi, harvest_id, - failed_submissions, + len(client_errors), ) result = await self.complete_harvest(harvest_id) + # Compatibility shim (issue #240): inject client-side errors into the result + # until the server persists and returns them natively via the harvest response. + # When the server supports it, result.errors will already be populated here + # and this merge can be removed. + if client_errors: + result = result.model_copy(update={"errors": result.errors + client_errors}) logger.info("[%s] Completed harvest %s", rdi, harvest_id) return result diff --git a/middleware/api_client/src/middleware/api_client/models.py b/middleware/api_client/src/middleware/api_client/models.py index 76e4e8cf..721166fa 100644 --- a/middleware/api_client/src/middleware/api_client/models.py +++ b/middleware/api_client/src/middleware/api_client/models.py @@ -40,6 +40,32 @@ class HarvestStatus(StrEnum): CANCELLED = "CANCELLED" +class HarvestErrorType(StrEnum): + """Category of a per-item error recorded during a harvest run.""" + + DUPLICATE = "duplicate" + SUBMISSION_FAILED = "submission_failed" + + +class HarvestError(BaseModel): + """A single per-item error recorded during a harvest run. + + Once the server persists errors natively (issue #240), this list is + populated directly from the server response returned by any harvest + query method. Until then, :meth:`~middleware.api_client.ApiClient.harvest_arcs` + collects errors client-side and injects them into the returned + :class:`HarvestResult` as a compatibility shim. + """ + + arc_id: Annotated[ + str | None, + Field(description="ARC identifier (RO-Crate identifier field), None if not applicable or not extractable"), + ] = None + error_type: Annotated[HarvestErrorType, Field(description="Category of the error")] + message: Annotated[str, Field(description="Human-readable error description")] + timestamp: Annotated[str, Field(description="ISO 8601 timestamp when the error occurred")] = "" + + class ArcEventSummary(BaseModel): """Summary of a single event recorded against an ARC.""" @@ -76,6 +102,26 @@ class ArcResult(BaseModel): client_id: Annotated[str | None, Field(description="Authenticated client identifier")] = None +class HarvestStatistics(BaseModel): + """Statistics for a completed harvest run. + + Mirrors the server-side ``HarvestStatistics`` wire format so that + :meth:`~middleware.api_client.ApiClient.HarvestResult.statistics` is + validated and typed rather than an opaque ``dict``. + """ + + expected_datasets: Annotated[ + int | None, + Field(description="Number of datasets expected to be harvested, as reported by the client."), + ] = None + arcs_submitted: Annotated[int, Field(description="Total ARCs submitted")] = 0 + arcs_new: Annotated[int, Field(description="New ARCs created")] = 0 + arcs_updated: Annotated[int, Field(description="Existing ARCs updated")] = 0 + arcs_unchanged: Annotated[int, Field(description="ARCs with no changes")] = 0 + arcs_missing: Annotated[int, Field(description="ARCs marked as missing")] = 0 + errors: Annotated[int, Field(description="Number of errors encountered")] = 0 + + class HarvestResult(BaseModel): """Result returned by harvest-related methods on :class:`~middleware.api_client.ApiClient`. @@ -88,6 +134,16 @@ class HarvestResult(BaseModel): status: Annotated[HarvestStatus, Field(description="Current harvest status")] started_at: Annotated[str, Field(description="ISO 8601 start timestamp")] completed_at: Annotated[str | None, Field(description="ISO 8601 completion timestamp")] = None - statistics: Annotated[dict, Field(description="Harvest statistics")] = Field(default_factory=dict) + statistics: Annotated[HarvestStatistics, Field(description="Harvest statistics")] = Field( + default_factory=HarvestStatistics + ) + errors: Annotated[ + list[HarvestError], + Field( + description="Per-item errors encountered during the harvest run. " + "Populated client-side by harvest_arcs() until the server supports " + "error persistence natively (issue #240)." + ), + ] = Field(default_factory=list) message: Annotated[str, Field(description="Human-readable result message")] = "" client_id: Annotated[str | None, Field(description="Authenticated client identifier")] = None diff --git a/middleware/api_client/tests/unit/test_client.py b/middleware/api_client/tests/unit/test_client.py index 0fee05ee..ebd0c7a9 100644 --- a/middleware/api_client/tests/unit/test_client.py +++ b/middleware/api_client/tests/unit/test_client.py @@ -13,7 +13,14 @@ import respx from arctrl import ARC, ArcInvestigation # type: ignore[import-untyped] -from middleware.api_client import ApiClient, ApiClientError, ArcResult, Config, HarvestResult +from middleware.api_client import ( + ApiClient, + ApiClientError, + ArcResult, + Config, + HarvestErrorType, + HarvestResult, +) # --------------------------------------------------------------------------- # Helpers @@ -359,12 +366,12 @@ async def slow_response(_: httpx.Request) -> httpx.Response: await asyncio.sleep(0.02) async with counter_lock: in_flight -= 1 - return httpx.Response(http.HTTPStatus.OK, json=[_HARVEST_RESPONSE]) + return httpx.Response(http.HTTPStatus.OK, json=_HARVEST_RESPONSE) - route = respx.get(f"{client_config.api_url}v3/harvests").mock(side_effect=slow_response) + route = respx.get(f"{client_config.api_url}v3/harvests/harvest-456").mock(side_effect=slow_response) async with ApiClient(client_config) as client: - await asyncio.gather(*(client.list_harvests() for _ in range(6))) + await asyncio.gather(*(client.get_harvest("harvest-456") for _ in range(6))) assert route.call_count == 6 # noqa: PLR2004 assert peak_in_flight <= 2 # noqa: PLR2004 @@ -426,31 +433,6 @@ async def test_create_harvest_503_not_retried(client_config: Config) -> None: assert route.call_count == 1 -@pytest.mark.asyncio -@respx.mock -async def test_list_harvests(client_config: Config) -> None: - """Test listing harvest runs.""" - respx.get(f"{client_config.api_url}v3/harvests").mock( - return_value=httpx.Response(http.HTTPStatus.OK, json=[_HARVEST_RESPONSE, _HARVEST_RESPONSE]) - ) - async with ApiClient(client_config) as client: - harvests = await client.list_harvests() - assert len(harvests) == 2 # noqa: PLR2004 - assert all(isinstance(h, HarvestResult) for h in harvests) - - -@pytest.mark.asyncio -@respx.mock -async def test_list_harvests_with_rdi_filter(client_config: Config) -> None: - """Test listing harvest runs filtered by RDI.""" - route = respx.get(f"{client_config.api_url}v3/harvests").mock( - return_value=httpx.Response(http.HTTPStatus.OK, json=[_HARVEST_RESPONSE]) - ) - async with ApiClient(client_config) as client: - await client.list_harvests(rdi="test-rdi") - assert "rdi=test-rdi" in str(route.calls.last.request.url) - - @pytest.mark.asyncio @respx.mock async def test_get_harvest(client_config: Config) -> None: @@ -688,6 +670,10 @@ async def test_harvest_arcs_continues_on_item_error(client_config: Config) -> No assert route_submit.call_count == EXPECTED_ARC_UPLOADS assert complete_route.called assert not cancel_route.called + assert len(result.errors) == 1 + assert result.errors[0].error_type == HarvestErrorType.SUBMISSION_FAILED + assert result.errors[0].arc_id is None + assert "HTTP error 400" in result.errors[0].message @pytest.mark.asyncio @@ -747,6 +733,10 @@ async def test_harvest_arcs_skips_duplicate_identifier(client_config: Config) -> assert result.status == "COMPLETED" assert arc_route.call_count == 1 # duplicate was skipped, not submitted assert complete_route.called + assert len(result.errors) == 1 + assert result.errors[0].error_type == HarvestErrorType.DUPLICATE + assert result.errors[0].arc_id == "duplicate-arc" + assert "duplicate-arc" in result.errors[0].message @pytest.mark.asyncio diff --git a/scripts/update-apk-dependencies.sh b/scripts/update-apk-dependencies.sh index 60e67d7b..08bcad35 100755 --- a/scripts/update-apk-dependencies.sh +++ b/scripts/update-apk-dependencies.sh @@ -73,7 +73,7 @@ while IFS= read -r match; do # Escape dots in version string (literal in versions, but special in sed regex) escaped_current="${current//./\\.}" - sed -i "s|\(^\|[[:space:]]\)${pkg}=${escaped_current}\([[:space:]]\|$\)|\1${pkg}=${latest}\2|g" "$DOCKERFILE" + sed -i "s/\(^\|[[:space:]]\)${pkg}=${escaped_current}\([[:space:]]\|$\)/\1${pkg}=${latest}\2/g" "$DOCKERFILE" done < <(grep -oE '[a-z0-9][a-z0-9_-]*=[0-9][a-z0-9._]+-r[0-9]+' "$DOCKERFILE" || true) @@ -101,7 +101,7 @@ while IFS= read -r match; do echo "⬆️ $pkg: $current → $latest" escaped_current="${current//./\\.}" - sed -i "s|\(^\|[[:space:]]\)${pkg}==${escaped_current}\([[:space:]]\|$\)|\1${pkg}==${latest}\2|g" "$DOCKERFILE" + sed -i "s/\(^\|[[:space:]]\)${pkg}==${escaped_current}\([[:space:]]\|$\)/\1${pkg}==${latest}\2/g" "$DOCKERFILE" done < <(grep -oE '[a-zA-Z0-9][a-zA-Z0-9_-]*==[0-9][a-z0-9._]*' "$DOCKERFILE" || true)