|
| 1 | +"""Regression tests for OpenAPI spec vs. live API mismatches. |
| 2 | +
|
| 3 | +Every test here points at a specific public Actor known to exhibit a data shape that has previously |
| 4 | +broken Pydantic model validation in `apify-client-python`. The point is *not* to test client behavior |
| 5 | +but to catch regressions where the auto-generated models drift away from the live API contract — the |
| 6 | +same class of bug as [#811](https://github.com/apify/apify-client-python/issues/811). |
| 7 | +
|
| 8 | +Each test documents the historical bug it guards against and the actor chosen to exercise it. Where |
| 9 | +the fixture's data shape is critical for the regression to be exercised, the test asserts on that |
| 10 | +shape explicitly so fixture drift fails loudly rather than silently turning the test into a no-op. |
| 11 | +
|
| 12 | +Scope note: every generated model uses `ConfigDict(extra='allow')`, so silently-added new fields in |
| 13 | +the API response are absorbed into `model_extra` and not flagged here. These tests catch |
| 14 | +required-field-missing, enum-violation, regex-violation, and discriminator regressions — not |
| 15 | +newly-introduced optional fields. |
| 16 | +""" |
| 17 | + |
| 18 | +from __future__ import annotations |
| 19 | + |
| 20 | +from typing import TYPE_CHECKING, get_args |
| 21 | + |
| 22 | +from ._utils import maybe_await |
| 23 | +from apify_client._literals import RunOrigin |
| 24 | +from apify_client._models import ( |
| 25 | + Actor, |
| 26 | + ActorChargeEvent, |
| 27 | + Build, |
| 28 | + FlatPricePerMonthActorPricingInfo, |
| 29 | + ListOfBuilds, |
| 30 | + ListOfStoreActors, |
| 31 | + PayPerEventActorPricingInfo, |
| 32 | + PricePerDatasetItemActorPricingInfo, |
| 33 | +) |
| 34 | + |
| 35 | +if TYPE_CHECKING: |
| 36 | + from apify_client import ApifyClient, ApifyClientAsync |
| 37 | + |
| 38 | + |
| 39 | +# ============================================================================ |
| 40 | +# Test fixtures — public actors known to exhibit specific edge-case data shapes. |
| 41 | +# ============================================================================ |
| 42 | + |
| 43 | +# Apify-owned actor whose `latest` build sets `minMemoryMbytes: 128` (well below the spec's |
| 44 | +# previously-required minimum of 256). Also has `actorDefinition.version: "0.0.1"`, |
| 45 | +# exercising the semver-triplet regex fixed in apify-docs#2555. |
| 46 | +SMALL_MIN_MEMORY_ACTOR = 'apify/instagram-profile-scraper' |
| 47 | + |
| 48 | +# Apify-owned actor whose builds list includes entries with `meta.origin: "CI"` |
| 49 | +# from the internal CI pipeline. A deep `desc=True` pagination is needed because |
| 50 | +# CI builds are infrequent and rotate out of the most-recent window. |
| 51 | +CI_ORIGIN_ACTOR = 'apify/cheerio-scraper' |
| 52 | + |
| 53 | +# The actor from the original #811 bug report — carries pricing-info entries for every |
| 54 | +# non-trivial variant: FLAT_PRICE_PER_MONTH, both flat and tiered PRICE_PER_DATASET_ITEM, |
| 55 | +# and tiered PAY_PER_EVENT with `isPrimaryEvent` / `isOneTimeEvent` fields. |
| 56 | +ALL_PRICING_VARIANTS_ACTOR = 'apify/facebook-pages-scraper' |
| 57 | + |
| 58 | + |
| 59 | +def _pick_build_id(actor: Actor) -> str: |
| 60 | + """Return a stable `build_id` from `actor.tagged_builds`, preferring the `latest` tag. |
| 61 | +
|
| 62 | + Avoids relying on API-side dict ordering (`next(iter(...))` would otherwise pick |
| 63 | + whichever tag the API decides to serialize first). |
| 64 | + """ |
| 65 | + assert actor.tagged_builds, f'{actor.username}/{actor.name} has no tagged builds' |
| 66 | + latest = actor.tagged_builds.get('latest') |
| 67 | + if latest is not None and latest.build_id is not None: |
| 68 | + return latest.build_id |
| 69 | + fallback = next( |
| 70 | + (info.build_id for info in actor.tagged_builds.values() if info and info.build_id), |
| 71 | + None, |
| 72 | + ) |
| 73 | + assert fallback is not None, f'{actor.username}/{actor.name} has no tagged build with a build_id' |
| 74 | + return fallback |
| 75 | + |
| 76 | + |
| 77 | +# ============================================================================ |
| 78 | +# Tests |
| 79 | +# ============================================================================ |
| 80 | + |
| 81 | + |
| 82 | +async def test_build_get_accepts_small_min_memory_mbytes(client: ApifyClient | ApifyClientAsync) -> None: |
| 83 | + """`build.get()` must parse `actorDefinition.minMemoryMbytes` values below 256. |
| 84 | +
|
| 85 | + Regression for apify-docs PR #2559 — the spec previously required `minimum: 256` on |
| 86 | + both `minMemoryMbytes` and `maxMemoryMbytes`, but the platform's real minimum is 128 MB |
| 87 | + and many community actors use exactly that. Prior to the fix, ~20 store actors failed |
| 88 | + `build.get()` with a Pydantic `greater_than_equal` ValidationError. |
| 89 | +
|
| 90 | + The spec change applied identically to `minMemoryMbytes` and `maxMemoryMbytes` — no |
| 91 | + fixture with a stable `maxMemoryMbytes < 256` was available at the time of writing, |
| 92 | + so the symmetric `max` side is implicitly trusted to share fate with `min`. |
| 93 | + """ |
| 94 | + actor = await maybe_await(client.actor(SMALL_MIN_MEMORY_ACTOR).get()) |
| 95 | + assert isinstance(actor, Actor) |
| 96 | + build_id = _pick_build_id(actor) |
| 97 | + |
| 98 | + # If this raises ValidationError, the spec has regressed. |
| 99 | + build = await maybe_await(client.build(build_id).get()) |
| 100 | + assert isinstance(build, Build) |
| 101 | + assert build.actor_definition is not None, 'expected actorDefinition on a SUCCEEDED build' |
| 102 | + |
| 103 | + # Fixture-drift guard: only useful as a regression test if the chosen build actually |
| 104 | + # carries a value below the old 256 threshold. |
| 105 | + actual_min = build.actor_definition.min_memory_mbytes |
| 106 | + assert actual_min is not None |
| 107 | + assert actual_min < 256, ( |
| 108 | + f'{SMALL_MIN_MEMORY_ACTOR} latest build has min_memory_mbytes={actual_min!r} ' |
| 109 | + '(expected <256). Pick a different fixture to keep this regression test meaningful.' |
| 110 | + ) |
| 111 | + |
| 112 | + |
| 113 | +async def test_actor_builds_list_accepts_ci_origin(client: ApifyClient | ApifyClientAsync) -> None: |
| 114 | + """`actor.builds().list()` must parse builds with `meta.origin: "CI"`. |
| 115 | +
|
| 116 | + Regression for apify-docs PR #2559 — `RunOrigin` enum was missing `"CI"`, even though |
| 117 | + Apify-owned actors are routinely rebuilt by the internal CI pipeline. A deep `desc=True` |
| 118 | + page (limit=100) is required because CI builds are rare relative to WEB/CLI builds. |
| 119 | + """ |
| 120 | + builds = await maybe_await(client.actor(CI_ORIGIN_ACTOR).builds().list(limit=100, desc=True)) |
| 121 | + assert isinstance(builds, ListOfBuilds) |
| 122 | + assert builds.items, f'{CI_ORIGIN_ACTOR} should have builds' |
| 123 | + |
| 124 | + # Fixture-drift guard: only useful as a regression test if the page actually contains a |
| 125 | + # CI-origin build. Pydantic already validated every `meta.origin` against `RunOrigin` |
| 126 | + # at deserialization, so the regression is exercised iff at least one such entry exists. |
| 127 | + ci_origin_builds = [b for b in builds.items if b.meta is not None and b.meta.origin == 'CI'] |
| 128 | + assert ci_origin_builds, ( |
| 129 | + f'{CI_ORIGIN_ACTOR}: no builds with meta.origin == "CI" in the most-recent 100. ' |
| 130 | + 'CI builds may have rotated out of the window — pick a different actor or paginate deeper.' |
| 131 | + ) |
| 132 | + |
| 133 | + |
| 134 | +def test_run_origin_enum_includes_ci() -> None: |
| 135 | + """`RunOrigin` literal must include `"CI"`. |
| 136 | +
|
| 137 | + Static guard against future model regenerations dropping the value. The live API has been |
| 138 | + returning `"CI"` on builds triggered by the internal CI pipeline since at least 2024. |
| 139 | + """ |
| 140 | + assert 'CI' in get_args(RunOrigin) |
| 141 | + |
| 142 | + |
| 143 | +async def test_actor_get_parses_tiered_price_per_dataset_item(client: ApifyClient | ApifyClientAsync) -> None: |
| 144 | + """`actor.get()` must parse `PRICE_PER_DATASET_ITEM` entries with `tieredPricing`. |
| 145 | +
|
| 146 | + Regression for apify-client-python #811 / apify-docs PR #2555. Tiered-PPD actors return |
| 147 | + `tieredPricing: {FREE, BRONZE, SILVER, GOLD, PLATINUM, DIAMOND}` instead of a flat |
| 148 | + `pricePerUnitUsd`; the spec previously required `pricePerUnitUsd` on every PPD entry. |
| 149 | + """ |
| 150 | + actor = await maybe_await(client.actor(ALL_PRICING_VARIANTS_ACTOR).get()) |
| 151 | + assert isinstance(actor, Actor) |
| 152 | + assert actor.pricing_infos |
| 153 | + |
| 154 | + tiered_ppd_entries = [ |
| 155 | + info |
| 156 | + for info in actor.pricing_infos |
| 157 | + if isinstance(info, PricePerDatasetItemActorPricingInfo) and info.tiered_pricing is not None |
| 158 | + ] |
| 159 | + assert tiered_ppd_entries, ( |
| 160 | + f'{ALL_PRICING_VARIANTS_ACTOR} should have at least one tiered PRICE_PER_DATASET_ITEM entry — ' |
| 161 | + 'pick a different actor if pricing changed.' |
| 162 | + ) |
| 163 | + |
| 164 | + # Fixture-drift guard: tiered pricing only exercises the regression meaningfully when it has |
| 165 | + # more than one tier and the tiers actually differ in price. A degenerate single-tier or |
| 166 | + # all-zero payload would silently look like flat pricing. |
| 167 | + for entry in tiered_ppd_entries: |
| 168 | + assert entry.tiered_pricing is not None # for type narrowing |
| 169 | + assert len(entry.tiered_pricing) >= 2, ( |
| 170 | + f'{ALL_PRICING_VARIANTS_ACTOR} tiered PPD entry has only {len(entry.tiered_pricing)} tier(s); ' |
| 171 | + 'expected multiple tiers (e.g. FREE/BRONZE/SILVER/GOLD/PLATINUM/DIAMOND).' |
| 172 | + ) |
| 173 | + distinct_prices = {t.tiered_price_per_unit_usd for t in entry.tiered_pricing.values()} |
| 174 | + assert len(distinct_prices) >= 2, ( |
| 175 | + f'{ALL_PRICING_VARIANTS_ACTOR} tiered PPD entry has all-identical prices ({distinct_prices}); ' |
| 176 | + 'tiers should differ.' |
| 177 | + ) |
| 178 | + |
| 179 | + |
| 180 | +async def test_actor_get_parses_tiered_pay_per_event(client: ApifyClient | ApifyClientAsync) -> None: |
| 181 | + """`actor.get()` must parse `PAY_PER_EVENT` events with `eventTieredPricingUsd`, |
| 182 | + `isPrimaryEvent`, and `isOneTimeEvent`. |
| 183 | +
|
| 184 | + Regression for apify-client-python #818 / apify-docs PR #2555. Tiered PPE events use |
| 185 | + `eventTieredPricingUsd` instead of `eventPriceUsd`, and the spec gained `isPrimaryEvent` |
| 186 | + / `isOneTimeEvent` fields. |
| 187 | + """ |
| 188 | + actor = await maybe_await(client.actor(ALL_PRICING_VARIANTS_ACTOR).get()) |
| 189 | + assert isinstance(actor, Actor) |
| 190 | + assert actor.pricing_infos |
| 191 | + |
| 192 | + tiered_ppe_events: list[ActorChargeEvent] = [] |
| 193 | + for info in actor.pricing_infos: |
| 194 | + if not isinstance(info, PayPerEventActorPricingInfo): |
| 195 | + continue |
| 196 | + events = info.pricing_per_event.actor_charge_events or {} |
| 197 | + tiered_ppe_events.extend(event for event in events.values() if event.event_tiered_pricing_usd is not None) |
| 198 | + |
| 199 | + assert tiered_ppe_events, ( |
| 200 | + f'{ALL_PRICING_VARIANTS_ACTOR} should have at least one tiered PAY_PER_EVENT event — ' |
| 201 | + 'pick a different actor if pricing changed.' |
| 202 | + ) |
| 203 | + # `isPrimaryEvent` and `isOneTimeEvent` were the specific additions in #811 — because every |
| 204 | + # model uses `extra='allow'`, a regenerator that drops either alias would silently absorb the |
| 205 | + # JSON key into `model_extra`. Asserting the typed attribute is populated catches that drift. |
| 206 | + assert any(event.is_primary_event is True for event in tiered_ppe_events), ( |
| 207 | + f'{ALL_PRICING_VARIANTS_ACTOR}: no tiered PPE event has is_primary_event == True. ' |
| 208 | + 'The isPrimaryEvent alias may have been dropped from the model.' |
| 209 | + ) |
| 210 | + assert any(event.is_one_time_event is not None for event in tiered_ppe_events), ( |
| 211 | + f'{ALL_PRICING_VARIANTS_ACTOR}: no tiered PPE event has is_one_time_event populated. ' |
| 212 | + 'The isOneTimeEvent alias may have been dropped from the model.' |
| 213 | + ) |
| 214 | + |
| 215 | + |
| 216 | +async def test_actor_pricing_infos_includes_expected_variants(client: ApifyClient | ApifyClientAsync) -> None: |
| 217 | + """`apify/facebook-pages-scraper` must continue to carry pricing entries for every |
| 218 | + bug-prone discriminated-union variant: PPE, PPD, and FLAT_PRICE_PER_MONTH. |
| 219 | +
|
| 220 | + Fixture sanity check. If the actor stops publishing one of these variants, the sibling |
| 221 | + `tiered_*` regression tests above no longer exercise their respective code paths and |
| 222 | + this test fails loudly — pointing maintainers to pick a different fixture. |
| 223 | +
|
| 224 | + Note: a hypothetical new pricing variant added to the API (e.g. `PAY_PER_RESULT`) cannot |
| 225 | + be detected here. Pydantic's discriminated union raises `ValidationError` at `actor.get()` |
| 226 | + time with a discriminator error, so the test crashes before reaching this code. |
| 227 | + """ |
| 228 | + actor = await maybe_await(client.actor(ALL_PRICING_VARIANTS_ACTOR).get()) |
| 229 | + assert isinstance(actor, Actor) |
| 230 | + assert actor.pricing_infos |
| 231 | + |
| 232 | + seen = {type(entry) for entry in actor.pricing_infos} |
| 233 | + expected = { |
| 234 | + PayPerEventActorPricingInfo, |
| 235 | + PricePerDatasetItemActorPricingInfo, |
| 236 | + FlatPricePerMonthActorPricingInfo, |
| 237 | + } |
| 238 | + missing = expected - seen |
| 239 | + assert not missing, ( |
| 240 | + f'{ALL_PRICING_VARIANTS_ACTOR} no longer carries pricing_infos for {missing}; ' |
| 241 | + 'the sibling regression tests for those variants are now unguarded. Pick a different fixture.' |
| 242 | + ) |
| 243 | + |
| 244 | + |
| 245 | +async def test_actor_definition_version_accepts_semver_triplet(client: ApifyClient | ApifyClientAsync) -> None: |
| 246 | + """`ActorDefinition.version` must accept semver-style triplets like `0.0.1`. |
| 247 | +
|
| 248 | + Regression for apify-docs PR #2555 — the pattern was previously `^[0-9]+\\.[0-9]+$`, |
| 249 | + rejecting any version with more than one dot. `apify/instagram-profile-scraper` |
| 250 | + publishes its actor.json with `version: "0.0.1"`. |
| 251 | + """ |
| 252 | + actor = await maybe_await(client.actor(SMALL_MIN_MEMORY_ACTOR).get()) |
| 253 | + assert isinstance(actor, Actor) |
| 254 | + build_id = _pick_build_id(actor) |
| 255 | + |
| 256 | + # Successful build.get() with a triplet version means the regex accepted it. |
| 257 | + build = await maybe_await(client.build(build_id).get()) |
| 258 | + assert isinstance(build, Build) |
| 259 | + assert build.actor_definition is not None |
| 260 | + # Fixture-drift guard: only useful as a regression test if the chosen build's version |
| 261 | + # actually carries more than one dot. The old broken regex required exactly one dot, |
| 262 | + # so `count('.') >= 2` is the discriminating shape. |
| 263 | + version = build.actor_definition.version |
| 264 | + assert version is not None |
| 265 | + assert version.count('.') >= 2, ( |
| 266 | + f'{SMALL_MIN_MEMORY_ACTOR} no longer publishes a multi-dot version (got {version!r}) — ' |
| 267 | + 'pick a different fixture to keep the regex regression test meaningful.' |
| 268 | + ) |
| 269 | + |
| 270 | + |
| 271 | +async def test_store_list_parses_full_first_page(client: ApifyClient | ApifyClientAsync) -> None: |
| 272 | + """`store.list()` must parse every item on a 100-actor page without ValidationError. |
| 273 | +
|
| 274 | + Broad sweep guard for the most-popular slice of the store. Catches enum-violation, |
| 275 | + required-field-missing, and discriminator regressions on whichever actors happen to be |
| 276 | + on page 1 (long-tail actors at deeper ranks are not covered). Because every model uses |
| 277 | + `extra='allow'`, this test cannot detect silently-added or silently-renamed fields. |
| 278 | + """ |
| 279 | + page = await maybe_await(client.store().list(limit=100)) |
| 280 | + assert isinstance(page, ListOfStoreActors) |
| 281 | + # All items have already been validated by Pydantic via `model_validate`. Touch a few |
| 282 | + # fields to make the assertion concrete and to ensure the page wasn't empty. |
| 283 | + assert page.items, f'{type(page).__name__} returned an empty items list — unexpected for the public store' |
| 284 | + for item in page.items: |
| 285 | + assert item.id |
| 286 | + assert item.name |
| 287 | + assert item.username |
0 commit comments