Skip to content

Commit d301c60

Browse files
committed
add some more tests
1 parent 67887fd commit d301c60

1 file changed

Lines changed: 287 additions & 0 deletions

File tree

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
"""Regression tests for OpenAPI spec vs. live API mismatches.
2+
3+
Every test here points at a specific public Actor known to exhibit a data shape that has previously
4+
broken Pydantic model validation in `apify-client-python`. The point is *not* to test client behavior
5+
but to catch regressions where the auto-generated models drift away from the live API contract — the
6+
same class of bug as [#811](https://github.com/apify/apify-client-python/issues/811).
7+
8+
Each test documents the historical bug it guards against and the actor chosen to exercise it. Where
9+
the fixture's data shape is critical for the regression to be exercised, the test asserts on that
10+
shape explicitly so fixture drift fails loudly rather than silently turning the test into a no-op.
11+
12+
Scope note: every generated model uses `ConfigDict(extra='allow')`, so silently-added new fields in
13+
the API response are absorbed into `model_extra` and not flagged here. These tests catch
14+
required-field-missing, enum-violation, regex-violation, and discriminator regressions — not
15+
newly-introduced optional fields.
16+
"""
17+
18+
from __future__ import annotations
19+
20+
from typing import TYPE_CHECKING, get_args
21+
22+
from ._utils import maybe_await
23+
from apify_client._literals import RunOrigin
24+
from apify_client._models import (
25+
Actor,
26+
ActorChargeEvent,
27+
Build,
28+
FlatPricePerMonthActorPricingInfo,
29+
ListOfBuilds,
30+
ListOfStoreActors,
31+
PayPerEventActorPricingInfo,
32+
PricePerDatasetItemActorPricingInfo,
33+
)
34+
35+
if TYPE_CHECKING:
36+
from apify_client import ApifyClient, ApifyClientAsync
37+
38+
39+
# ============================================================================
40+
# Test fixtures — public actors known to exhibit specific edge-case data shapes.
41+
# ============================================================================
42+
43+
# Apify-owned actor whose `latest` build sets `minMemoryMbytes: 128` (well below the spec's
44+
# previously-required minimum of 256). Also has `actorDefinition.version: "0.0.1"`,
45+
# exercising the semver-triplet regex fixed in apify-docs#2555.
46+
SMALL_MIN_MEMORY_ACTOR = 'apify/instagram-profile-scraper'
47+
48+
# Apify-owned actor whose builds list includes entries with `meta.origin: "CI"`
49+
# from the internal CI pipeline. A deep `desc=True` pagination is needed because
50+
# CI builds are infrequent and rotate out of the most-recent window.
51+
CI_ORIGIN_ACTOR = 'apify/cheerio-scraper'
52+
53+
# The actor from the original #811 bug report — carries pricing-info entries for every
54+
# non-trivial variant: FLAT_PRICE_PER_MONTH, both flat and tiered PRICE_PER_DATASET_ITEM,
55+
# and tiered PAY_PER_EVENT with `isPrimaryEvent` / `isOneTimeEvent` fields.
56+
ALL_PRICING_VARIANTS_ACTOR = 'apify/facebook-pages-scraper'
57+
58+
59+
def _pick_build_id(actor: Actor) -> str:
60+
"""Return a stable `build_id` from `actor.tagged_builds`, preferring the `latest` tag.
61+
62+
Avoids relying on API-side dict ordering (`next(iter(...))` would otherwise pick
63+
whichever tag the API decides to serialize first).
64+
"""
65+
assert actor.tagged_builds, f'{actor.username}/{actor.name} has no tagged builds'
66+
latest = actor.tagged_builds.get('latest')
67+
if latest is not None and latest.build_id is not None:
68+
return latest.build_id
69+
fallback = next(
70+
(info.build_id for info in actor.tagged_builds.values() if info and info.build_id),
71+
None,
72+
)
73+
assert fallback is not None, f'{actor.username}/{actor.name} has no tagged build with a build_id'
74+
return fallback
75+
76+
77+
# ============================================================================
78+
# Tests
79+
# ============================================================================
80+
81+
82+
async def test_build_get_accepts_small_min_memory_mbytes(client: ApifyClient | ApifyClientAsync) -> None:
83+
"""`build.get()` must parse `actorDefinition.minMemoryMbytes` values below 256.
84+
85+
Regression for apify-docs PR #2559 — the spec previously required `minimum: 256` on
86+
both `minMemoryMbytes` and `maxMemoryMbytes`, but the platform's real minimum is 128 MB
87+
and many community actors use exactly that. Prior to the fix, ~20 store actors failed
88+
`build.get()` with a Pydantic `greater_than_equal` ValidationError.
89+
90+
The spec change applied identically to `minMemoryMbytes` and `maxMemoryMbytes` — no
91+
fixture with a stable `maxMemoryMbytes < 256` was available at the time of writing,
92+
so the symmetric `max` side is implicitly trusted to share fate with `min`.
93+
"""
94+
actor = await maybe_await(client.actor(SMALL_MIN_MEMORY_ACTOR).get())
95+
assert isinstance(actor, Actor)
96+
build_id = _pick_build_id(actor)
97+
98+
# If this raises ValidationError, the spec has regressed.
99+
build = await maybe_await(client.build(build_id).get())
100+
assert isinstance(build, Build)
101+
assert build.actor_definition is not None, 'expected actorDefinition on a SUCCEEDED build'
102+
103+
# Fixture-drift guard: only useful as a regression test if the chosen build actually
104+
# carries a value below the old 256 threshold.
105+
actual_min = build.actor_definition.min_memory_mbytes
106+
assert actual_min is not None
107+
assert actual_min < 256, (
108+
f'{SMALL_MIN_MEMORY_ACTOR} latest build has min_memory_mbytes={actual_min!r} '
109+
'(expected <256). Pick a different fixture to keep this regression test meaningful.'
110+
)
111+
112+
113+
async def test_actor_builds_list_accepts_ci_origin(client: ApifyClient | ApifyClientAsync) -> None:
114+
"""`actor.builds().list()` must parse builds with `meta.origin: "CI"`.
115+
116+
Regression for apify-docs PR #2559 — `RunOrigin` enum was missing `"CI"`, even though
117+
Apify-owned actors are routinely rebuilt by the internal CI pipeline. A deep `desc=True`
118+
page (limit=100) is required because CI builds are rare relative to WEB/CLI builds.
119+
"""
120+
builds = await maybe_await(client.actor(CI_ORIGIN_ACTOR).builds().list(limit=100, desc=True))
121+
assert isinstance(builds, ListOfBuilds)
122+
assert builds.items, f'{CI_ORIGIN_ACTOR} should have builds'
123+
124+
# Fixture-drift guard: only useful as a regression test if the page actually contains a
125+
# CI-origin build. Pydantic already validated every `meta.origin` against `RunOrigin`
126+
# at deserialization, so the regression is exercised iff at least one such entry exists.
127+
ci_origin_builds = [b for b in builds.items if b.meta is not None and b.meta.origin == 'CI']
128+
assert ci_origin_builds, (
129+
f'{CI_ORIGIN_ACTOR}: no builds with meta.origin == "CI" in the most-recent 100. '
130+
'CI builds may have rotated out of the window — pick a different actor or paginate deeper.'
131+
)
132+
133+
134+
def test_run_origin_enum_includes_ci() -> None:
135+
"""`RunOrigin` literal must include `"CI"`.
136+
137+
Static guard against future model regenerations dropping the value. The live API has been
138+
returning `"CI"` on builds triggered by the internal CI pipeline since at least 2024.
139+
"""
140+
assert 'CI' in get_args(RunOrigin)
141+
142+
143+
async def test_actor_get_parses_tiered_price_per_dataset_item(client: ApifyClient | ApifyClientAsync) -> None:
144+
"""`actor.get()` must parse `PRICE_PER_DATASET_ITEM` entries with `tieredPricing`.
145+
146+
Regression for apify-client-python #811 / apify-docs PR #2555. Tiered-PPD actors return
147+
`tieredPricing: {FREE, BRONZE, SILVER, GOLD, PLATINUM, DIAMOND}` instead of a flat
148+
`pricePerUnitUsd`; the spec previously required `pricePerUnitUsd` on every PPD entry.
149+
"""
150+
actor = await maybe_await(client.actor(ALL_PRICING_VARIANTS_ACTOR).get())
151+
assert isinstance(actor, Actor)
152+
assert actor.pricing_infos
153+
154+
tiered_ppd_entries = [
155+
info
156+
for info in actor.pricing_infos
157+
if isinstance(info, PricePerDatasetItemActorPricingInfo) and info.tiered_pricing is not None
158+
]
159+
assert tiered_ppd_entries, (
160+
f'{ALL_PRICING_VARIANTS_ACTOR} should have at least one tiered PRICE_PER_DATASET_ITEM entry — '
161+
'pick a different actor if pricing changed.'
162+
)
163+
164+
# Fixture-drift guard: tiered pricing only exercises the regression meaningfully when it has
165+
# more than one tier and the tiers actually differ in price. A degenerate single-tier or
166+
# all-zero payload would silently look like flat pricing.
167+
for entry in tiered_ppd_entries:
168+
assert entry.tiered_pricing is not None # for type narrowing
169+
assert len(entry.tiered_pricing) >= 2, (
170+
f'{ALL_PRICING_VARIANTS_ACTOR} tiered PPD entry has only {len(entry.tiered_pricing)} tier(s); '
171+
'expected multiple tiers (e.g. FREE/BRONZE/SILVER/GOLD/PLATINUM/DIAMOND).'
172+
)
173+
distinct_prices = {t.tiered_price_per_unit_usd for t in entry.tiered_pricing.values()}
174+
assert len(distinct_prices) >= 2, (
175+
f'{ALL_PRICING_VARIANTS_ACTOR} tiered PPD entry has all-identical prices ({distinct_prices}); '
176+
'tiers should differ.'
177+
)
178+
179+
180+
async def test_actor_get_parses_tiered_pay_per_event(client: ApifyClient | ApifyClientAsync) -> None:
181+
"""`actor.get()` must parse `PAY_PER_EVENT` events with `eventTieredPricingUsd`,
182+
`isPrimaryEvent`, and `isOneTimeEvent`.
183+
184+
Regression for apify-client-python #818 / apify-docs PR #2555. Tiered PPE events use
185+
`eventTieredPricingUsd` instead of `eventPriceUsd`, and the spec gained `isPrimaryEvent`
186+
/ `isOneTimeEvent` fields.
187+
"""
188+
actor = await maybe_await(client.actor(ALL_PRICING_VARIANTS_ACTOR).get())
189+
assert isinstance(actor, Actor)
190+
assert actor.pricing_infos
191+
192+
tiered_ppe_events: list[ActorChargeEvent] = []
193+
for info in actor.pricing_infos:
194+
if not isinstance(info, PayPerEventActorPricingInfo):
195+
continue
196+
events = info.pricing_per_event.actor_charge_events or {}
197+
tiered_ppe_events.extend(event for event in events.values() if event.event_tiered_pricing_usd is not None)
198+
199+
assert tiered_ppe_events, (
200+
f'{ALL_PRICING_VARIANTS_ACTOR} should have at least one tiered PAY_PER_EVENT event — '
201+
'pick a different actor if pricing changed.'
202+
)
203+
# `isPrimaryEvent` and `isOneTimeEvent` were the specific additions in #811 — because every
204+
# model uses `extra='allow'`, a regenerator that drops either alias would silently absorb the
205+
# JSON key into `model_extra`. Asserting the typed attribute is populated catches that drift.
206+
assert any(event.is_primary_event is True for event in tiered_ppe_events), (
207+
f'{ALL_PRICING_VARIANTS_ACTOR}: no tiered PPE event has is_primary_event == True. '
208+
'The isPrimaryEvent alias may have been dropped from the model.'
209+
)
210+
assert any(event.is_one_time_event is not None for event in tiered_ppe_events), (
211+
f'{ALL_PRICING_VARIANTS_ACTOR}: no tiered PPE event has is_one_time_event populated. '
212+
'The isOneTimeEvent alias may have been dropped from the model.'
213+
)
214+
215+
216+
async def test_actor_pricing_infos_includes_expected_variants(client: ApifyClient | ApifyClientAsync) -> None:
217+
"""`apify/facebook-pages-scraper` must continue to carry pricing entries for every
218+
bug-prone discriminated-union variant: PPE, PPD, and FLAT_PRICE_PER_MONTH.
219+
220+
Fixture sanity check. If the actor stops publishing one of these variants, the sibling
221+
`tiered_*` regression tests above no longer exercise their respective code paths and
222+
this test fails loudly — pointing maintainers to pick a different fixture.
223+
224+
Note: a hypothetical new pricing variant added to the API (e.g. `PAY_PER_RESULT`) cannot
225+
be detected here. Pydantic's discriminated union raises `ValidationError` at `actor.get()`
226+
time with a discriminator error, so the test crashes before reaching this code.
227+
"""
228+
actor = await maybe_await(client.actor(ALL_PRICING_VARIANTS_ACTOR).get())
229+
assert isinstance(actor, Actor)
230+
assert actor.pricing_infos
231+
232+
seen = {type(entry) for entry in actor.pricing_infos}
233+
expected = {
234+
PayPerEventActorPricingInfo,
235+
PricePerDatasetItemActorPricingInfo,
236+
FlatPricePerMonthActorPricingInfo,
237+
}
238+
missing = expected - seen
239+
assert not missing, (
240+
f'{ALL_PRICING_VARIANTS_ACTOR} no longer carries pricing_infos for {missing}; '
241+
'the sibling regression tests for those variants are now unguarded. Pick a different fixture.'
242+
)
243+
244+
245+
async def test_actor_definition_version_accepts_semver_triplet(client: ApifyClient | ApifyClientAsync) -> None:
246+
"""`ActorDefinition.version` must accept semver-style triplets like `0.0.1`.
247+
248+
Regression for apify-docs PR #2555 — the pattern was previously `^[0-9]+\\.[0-9]+$`,
249+
rejecting any version with more than one dot. `apify/instagram-profile-scraper`
250+
publishes its actor.json with `version: "0.0.1"`.
251+
"""
252+
actor = await maybe_await(client.actor(SMALL_MIN_MEMORY_ACTOR).get())
253+
assert isinstance(actor, Actor)
254+
build_id = _pick_build_id(actor)
255+
256+
# Successful build.get() with a triplet version means the regex accepted it.
257+
build = await maybe_await(client.build(build_id).get())
258+
assert isinstance(build, Build)
259+
assert build.actor_definition is not None
260+
# Fixture-drift guard: only useful as a regression test if the chosen build's version
261+
# actually carries more than one dot. The old broken regex required exactly one dot,
262+
# so `count('.') >= 2` is the discriminating shape.
263+
version = build.actor_definition.version
264+
assert version is not None
265+
assert version.count('.') >= 2, (
266+
f'{SMALL_MIN_MEMORY_ACTOR} no longer publishes a multi-dot version (got {version!r}) — '
267+
'pick a different fixture to keep the regex regression test meaningful.'
268+
)
269+
270+
271+
async def test_store_list_parses_full_first_page(client: ApifyClient | ApifyClientAsync) -> None:
272+
"""`store.list()` must parse every item on a 100-actor page without ValidationError.
273+
274+
Broad sweep guard for the most-popular slice of the store. Catches enum-violation,
275+
required-field-missing, and discriminator regressions on whichever actors happen to be
276+
on page 1 (long-tail actors at deeper ranks are not covered). Because every model uses
277+
`extra='allow'`, this test cannot detect silently-added or silently-renamed fields.
278+
"""
279+
page = await maybe_await(client.store().list(limit=100))
280+
assert isinstance(page, ListOfStoreActors)
281+
# All items have already been validated by Pydantic via `model_validate`. Touch a few
282+
# fields to make the assertion concrete and to ensure the page wasn't empty.
283+
assert page.items, f'{type(page).__name__} returned an empty items list — unexpected for the public store'
284+
for item in page.items:
285+
assert item.id
286+
assert item.name
287+
assert item.username

0 commit comments

Comments
 (0)