add some more tests

vdusek · vdusek · commit d301c605ca55 · 2026-05-22T15:41:16.000+02:00
diff --git a/tests/integration/test_model_compatibility.py b/tests/integration/test_model_compatibility.py
@@ -0,0 +1,287 @@
+"""Regression tests for OpenAPI spec vs. live API mismatches.
+
+Every test here points at a specific public Actor known to exhibit a data shape that has previously
+broken Pydantic model validation in `apify-client-python`. The point is *not* to test client behavior
+but to catch regressions where the auto-generated models drift away from the live API contract — the
+same class of bug as [#811](https://github.com/apify/apify-client-python/issues/811).
+
+Each test documents the historical bug it guards against and the actor chosen to exercise it. Where
+the fixture's data shape is critical for the regression to be exercised, the test asserts on that
+shape explicitly so fixture drift fails loudly rather than silently turning the test into a no-op.
+
+Scope note: every generated model uses `ConfigDict(extra='allow')`, so silently-added new fields in
+the API response are absorbed into `model_extra` and not flagged here. These tests catch
+required-field-missing, enum-violation, regex-violation, and discriminator regressions — not
+newly-introduced optional fields.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, get_args
+
+from ._utils import maybe_await
+from apify_client._literals import RunOrigin
+from apify_client._models import (
+    Actor,
+    ActorChargeEvent,
+    Build,
+    FlatPricePerMonthActorPricingInfo,
+    ListOfBuilds,
+    ListOfStoreActors,
+    PayPerEventActorPricingInfo,
+    PricePerDatasetItemActorPricingInfo,
+)
+
+if TYPE_CHECKING:
+    from apify_client import ApifyClient, ApifyClientAsync
+
+
+# ============================================================================
+# Test fixtures — public actors known to exhibit specific edge-case data shapes.
+# ============================================================================
+
+# Apify-owned actor whose `latest` build sets `minMemoryMbytes: 128` (well below the spec's
+# previously-required minimum of 256). Also has `actorDefinition.version: "0.0.1"`,
+# exercising the semver-triplet regex fixed in apify-docs#2555.
+SMALL_MIN_MEMORY_ACTOR = 'apify/instagram-profile-scraper'
+
+# Apify-owned actor whose builds list includes entries with `meta.origin: "CI"`
+# from the internal CI pipeline. A deep `desc=True` pagination is needed because
+# CI builds are infrequent and rotate out of the most-recent window.
+CI_ORIGIN_ACTOR = 'apify/cheerio-scraper'
+
+# The actor from the original #811 bug report — carries pricing-info entries for every
+# non-trivial variant: FLAT_PRICE_PER_MONTH, both flat and tiered PRICE_PER_DATASET_ITEM,
+# and tiered PAY_PER_EVENT with `isPrimaryEvent` / `isOneTimeEvent` fields.
+ALL_PRICING_VARIANTS_ACTOR = 'apify/facebook-pages-scraper'
+
+
+def _pick_build_id(actor: Actor) -> str:
+    """Return a stable `build_id` from `actor.tagged_builds`, preferring the `latest` tag.
+
+    Avoids relying on API-side dict ordering (`next(iter(...))` would otherwise pick
+    whichever tag the API decides to serialize first).
+    """
+    assert actor.tagged_builds, f'{actor.username}/{actor.name} has no tagged builds'
+    latest = actor.tagged_builds.get('latest')
+    if latest is not None and latest.build_id is not None:
+        return latest.build_id
+    fallback = next(
+        (info.build_id for info in actor.tagged_builds.values() if info and info.build_id),
+        None,
+    )
+    assert fallback is not None, f'{actor.username}/{actor.name} has no tagged build with a build_id'
+    return fallback
+
+
+# ============================================================================
+# Tests
+# ============================================================================
+
+
+async def test_build_get_accepts_small_min_memory_mbytes(client: ApifyClient | ApifyClientAsync) -> None:
+    """`build.get()` must parse `actorDefinition.minMemoryMbytes` values below 256.
+
+    Regression for apify-docs PR #2559 — the spec previously required `minimum: 256` on
+    both `minMemoryMbytes` and `maxMemoryMbytes`, but the platform's real minimum is 128 MB
+    and many community actors use exactly that. Prior to the fix, ~20 store actors failed
+    `build.get()` with a Pydantic `greater_than_equal` ValidationError.
+
+    The spec change applied identically to `minMemoryMbytes` and `maxMemoryMbytes` — no
+    fixture with a stable `maxMemoryMbytes < 256` was available at the time of writing,
+    so the symmetric `max` side is implicitly trusted to share fate with `min`.
+    """
+    actor = await maybe_await(client.actor(SMALL_MIN_MEMORY_ACTOR).get())
+    assert isinstance(actor, Actor)
+    build_id = _pick_build_id(actor)
+
+    # If this raises ValidationError, the spec has regressed.
+    build = await maybe_await(client.build(build_id).get())
+    assert isinstance(build, Build)
+    assert build.actor_definition is not None, 'expected actorDefinition on a SUCCEEDED build'
+
+    # Fixture-drift guard: only useful as a regression test if the chosen build actually
+    # carries a value below the old 256 threshold.
+    actual_min = build.actor_definition.min_memory_mbytes
+    assert actual_min is not None
+    assert actual_min < 256, (
+        f'{SMALL_MIN_MEMORY_ACTOR} latest build has min_memory_mbytes={actual_min!r} '
+        '(expected <256). Pick a different fixture to keep this regression test meaningful.'
+    )
+
+
+async def test_actor_builds_list_accepts_ci_origin(client: ApifyClient | ApifyClientAsync) -> None:
+    """`actor.builds().list()` must parse builds with `meta.origin: "CI"`.
+
+    Regression for apify-docs PR #2559 — `RunOrigin` enum was missing `"CI"`, even though
+    Apify-owned actors are routinely rebuilt by the internal CI pipeline. A deep `desc=True`
+    page (limit=100) is required because CI builds are rare relative to WEB/CLI builds.
+    """
+    builds = await maybe_await(client.actor(CI_ORIGIN_ACTOR).builds().list(limit=100, desc=True))
+    assert isinstance(builds, ListOfBuilds)
+    assert builds.items, f'{CI_ORIGIN_ACTOR} should have builds'
+
+    # Fixture-drift guard: only useful as a regression test if the page actually contains a
+    # CI-origin build. Pydantic already validated every `meta.origin` against `RunOrigin`
+    # at deserialization, so the regression is exercised iff at least one such entry exists.
+    ci_origin_builds = [b for b in builds.items if b.meta is not None and b.meta.origin == 'CI']
+    assert ci_origin_builds, (
+        f'{CI_ORIGIN_ACTOR}: no builds with meta.origin == "CI" in the most-recent 100. '
+        'CI builds may have rotated out of the window — pick a different actor or paginate deeper.'
+    )
+
+
+def test_run_origin_enum_includes_ci() -> None:
+    """`RunOrigin` literal must include `"CI"`.
+
+    Static guard against future model regenerations dropping the value. The live API has been
+    returning `"CI"` on builds triggered by the internal CI pipeline since at least 2024.
+    """
+    assert 'CI' in get_args(RunOrigin)
+
+
+async def test_actor_get_parses_tiered_price_per_dataset_item(client: ApifyClient | ApifyClientAsync) -> None:
+    """`actor.get()` must parse `PRICE_PER_DATASET_ITEM` entries with `tieredPricing`.
+
+    Regression for apify-client-python #811 / apify-docs PR #2555. Tiered-PPD actors return
+    `tieredPricing: {FREE, BRONZE, SILVER, GOLD, PLATINUM, DIAMOND}` instead of a flat
+    `pricePerUnitUsd`; the spec previously required `pricePerUnitUsd` on every PPD entry.
+    """
+    actor = await maybe_await(client.actor(ALL_PRICING_VARIANTS_ACTOR).get())
+    assert isinstance(actor, Actor)
+    assert actor.pricing_infos
+
+    tiered_ppd_entries = [
+        info
+        for info in actor.pricing_infos
+        if isinstance(info, PricePerDatasetItemActorPricingInfo) and info.tiered_pricing is not None
+    ]
+    assert tiered_ppd_entries, (
+        f'{ALL_PRICING_VARIANTS_ACTOR} should have at least one tiered PRICE_PER_DATASET_ITEM entry — '
+        'pick a different actor if pricing changed.'
+    )
+
+    # Fixture-drift guard: tiered pricing only exercises the regression meaningfully when it has
+    # more than one tier and the tiers actually differ in price. A degenerate single-tier or
+    # all-zero payload would silently look like flat pricing.
+    for entry in tiered_ppd_entries:
+        assert entry.tiered_pricing is not None  # for type narrowing
+        assert len(entry.tiered_pricing) >= 2, (
+            f'{ALL_PRICING_VARIANTS_ACTOR} tiered PPD entry has only {len(entry.tiered_pricing)} tier(s); '
+            'expected multiple tiers (e.g. FREE/BRONZE/SILVER/GOLD/PLATINUM/DIAMOND).'
+        )
+        distinct_prices = {t.tiered_price_per_unit_usd for t in entry.tiered_pricing.values()}
+        assert len(distinct_prices) >= 2, (
+            f'{ALL_PRICING_VARIANTS_ACTOR} tiered PPD entry has all-identical prices ({distinct_prices}); '
+            'tiers should differ.'
+        )
+
+
+async def test_actor_get_parses_tiered_pay_per_event(client: ApifyClient | ApifyClientAsync) -> None:
+    """`actor.get()` must parse `PAY_PER_EVENT` events with `eventTieredPricingUsd`,
+    `isPrimaryEvent`, and `isOneTimeEvent`.
+
+    Regression for apify-client-python #818 / apify-docs PR #2555. Tiered PPE events use
+    `eventTieredPricingUsd` instead of `eventPriceUsd`, and the spec gained `isPrimaryEvent`
+    / `isOneTimeEvent` fields.
+    """
+    actor = await maybe_await(client.actor(ALL_PRICING_VARIANTS_ACTOR).get())
+    assert isinstance(actor, Actor)
+    assert actor.pricing_infos
+
+    tiered_ppe_events: list[ActorChargeEvent] = []
+    for info in actor.pricing_infos:
+        if not isinstance(info, PayPerEventActorPricingInfo):
+            continue
+        events = info.pricing_per_event.actor_charge_events or {}
+        tiered_ppe_events.extend(event for event in events.values() if event.event_tiered_pricing_usd is not None)
+
+    assert tiered_ppe_events, (
+        f'{ALL_PRICING_VARIANTS_ACTOR} should have at least one tiered PAY_PER_EVENT event — '
+        'pick a different actor if pricing changed.'
+    )
+    # `isPrimaryEvent` and `isOneTimeEvent` were the specific additions in #811 — because every
+    # model uses `extra='allow'`, a regenerator that drops either alias would silently absorb the
+    # JSON key into `model_extra`. Asserting the typed attribute is populated catches that drift.
+    assert any(event.is_primary_event is True for event in tiered_ppe_events), (
+        f'{ALL_PRICING_VARIANTS_ACTOR}: no tiered PPE event has is_primary_event == True. '
+        'The isPrimaryEvent alias may have been dropped from the model.'
+    )
+    assert any(event.is_one_time_event is not None for event in tiered_ppe_events), (
+        f'{ALL_PRICING_VARIANTS_ACTOR}: no tiered PPE event has is_one_time_event populated. '
+        'The isOneTimeEvent alias may have been dropped from the model.'
+    )
+
+
+async def test_actor_pricing_infos_includes_expected_variants(client: ApifyClient | ApifyClientAsync) -> None:
+    """`apify/facebook-pages-scraper` must continue to carry pricing entries for every
+    bug-prone discriminated-union variant: PPE, PPD, and FLAT_PRICE_PER_MONTH.
+
+    Fixture sanity check. If the actor stops publishing one of these variants, the sibling
+    `tiered_*` regression tests above no longer exercise their respective code paths and
+    this test fails loudly — pointing maintainers to pick a different fixture.
+
+    Note: a hypothetical new pricing variant added to the API (e.g. `PAY_PER_RESULT`) cannot
+    be detected here. Pydantic's discriminated union raises `ValidationError` at `actor.get()`
+    time with a discriminator error, so the test crashes before reaching this code.
+    """
+    actor = await maybe_await(client.actor(ALL_PRICING_VARIANTS_ACTOR).get())
+    assert isinstance(actor, Actor)
+    assert actor.pricing_infos
+
+    seen = {type(entry) for entry in actor.pricing_infos}
+    expected = {
+        PayPerEventActorPricingInfo,
+        PricePerDatasetItemActorPricingInfo,
+        FlatPricePerMonthActorPricingInfo,
+    }
+    missing = expected - seen
+    assert not missing, (
+        f'{ALL_PRICING_VARIANTS_ACTOR} no longer carries pricing_infos for {missing}; '
+        'the sibling regression tests for those variants are now unguarded. Pick a different fixture.'
+    )
+
+
+async def test_actor_definition_version_accepts_semver_triplet(client: ApifyClient | ApifyClientAsync) -> None:
+    """`ActorDefinition.version` must accept semver-style triplets like `0.0.1`.
+
+    Regression for apify-docs PR #2555 — the pattern was previously `^[0-9]+\\.[0-9]+$`,
+    rejecting any version with more than one dot. `apify/instagram-profile-scraper`
+    publishes its actor.json with `version: "0.0.1"`.
+    """
+    actor = await maybe_await(client.actor(SMALL_MIN_MEMORY_ACTOR).get())
+    assert isinstance(actor, Actor)
+    build_id = _pick_build_id(actor)
+
+    # Successful build.get() with a triplet version means the regex accepted it.
+    build = await maybe_await(client.build(build_id).get())
+    assert isinstance(build, Build)
+    assert build.actor_definition is not None
+    # Fixture-drift guard: only useful as a regression test if the chosen build's version
+    # actually carries more than one dot. The old broken regex required exactly one dot,
+    # so `count('.') >= 2` is the discriminating shape.
+    version = build.actor_definition.version
+    assert version is not None
+    assert version.count('.') >= 2, (
+        f'{SMALL_MIN_MEMORY_ACTOR} no longer publishes a multi-dot version (got {version!r}) — '
+        'pick a different fixture to keep the regex regression test meaningful.'
+    )
+
+
+async def test_store_list_parses_full_first_page(client: ApifyClient | ApifyClientAsync) -> None:
+    """`store.list()` must parse every item on a 100-actor page without ValidationError.
+
+    Broad sweep guard for the most-popular slice of the store. Catches enum-violation,
+    required-field-missing, and discriminator regressions on whichever actors happen to be
+    on page 1 (long-tail actors at deeper ranks are not covered). Because every model uses
+    `extra='allow'`, this test cannot detect silently-added or silently-renamed fields.
+    """
+    page = await maybe_await(client.store().list(limit=100))
+    assert isinstance(page, ListOfStoreActors)
+    # All items have already been validated by Pydantic via `model_validate`. Touch a few
+    # fields to make the assertion concrete and to ensure the page wasn't empty.
+    assert page.items, f'{type(page).__name__} returned an empty items list — unexpected for the public store'
+    for item in page.items:
+        assert item.id
+        assert item.name
+        assert item.username