Skip to content

Commit 6df40d3

Browse files
authored
fix(snooze): early-kill pure-pumper sources at >=80% ad_rate / >=10 sample [FFM-847] (#221)
Standard 30%/30-sample gate (#214) misses sources that pump ~100% ads at low volume — they take 1-7 days to self-snooze on accumulation. Cohort sources 148/149/155/156 (FFM-847) are mid-band stuck for this reason. Adds criterion 3a as additive sub-gate: ad_rate >= 80% with >=10 processed memes -> snooze with reason 'extreme_ad_rate' (distinct from 'high_ad_rate' for monitoring attribution). Standard 3b gate stays intact. Sample-size floor of 10 protects against noise; the 80% threshold is well above any noise band, so the existing 30-meme floor is unnecessary for extreme cases. Per CEO greenlight on FFM-847 comment 097e1f25.
1 parent d7eb3bf commit 6df40d3

2 files changed

Lines changed: 85 additions & 3 deletions

File tree

src/storage/service.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@ async def maybe_auto_snooze_source(
6969
Snoozes the source if any criterion is met:
7070
1. 3 consecutive parse attempts returned 0 posts.
7171
2. like_rate < 10% with at least 100 total reactions.
72-
3. ad_rate > 30% over rolling 7d window with >= 30 processed memes.
72+
3a. ad_rate >= 80% over rolling 7d window with >= 10 processed memes
73+
(early-kill for extreme pumpers).
74+
3b. ad_rate > 30% over rolling 7d window with >= 30 processed memes.
7375
Returns the snooze reason string if snoozed, None otherwise.
7476
"""
7577
source = await fetch_one(select(meme_source).where(meme_source.c.id == meme_source_id))
@@ -140,9 +142,24 @@ async def maybe_auto_snooze_source(
140142
),
141143
{"sid": meme_source_id},
142144
)
143-
if ad_stats and ad_stats["n_processed"] >= 30:
145+
if ad_stats and ad_stats["n_processed"] >= 10:
144146
ad_rate = ad_stats["n_ads"] / ad_stats["n_processed"]
145-
if ad_rate > 0.30:
147+
# Criterion 3a: pure-pumper early-kill. A 100%-ad source has no legitimate
148+
# signal regardless of volume; the 30-meme floor only protects against
149+
# noise, which doesn't apply at >=80% ad_rate.
150+
if ad_rate >= 0.80:
151+
await update_meme_source(
152+
meme_source_id,
153+
status=MemeSourceStatus.SNOOZED.value,
154+
data={
155+
**updated_data,
156+
"snoozed_reason": "extreme_ad_rate",
157+
"snoozed_at": now_iso,
158+
},
159+
)
160+
return "extreme_ad_rate"
161+
# Criterion 3b: standard high-ad-rate gate.
162+
if ad_stats["n_processed"] >= 30 and ad_rate > 0.30:
146163
await update_meme_source(
147164
meme_source_id,
148165
status=MemeSourceStatus.SNOOZED.value,

tests/test_auto_snooze.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,3 +286,68 @@ async def test_failed_pipeline_memes_excluded_from_denominator(conn: AsyncConnec
286286
result = await maybe_auto_snooze_source(SOURCE_ID, new_posts_count=5)
287287

288288
assert result == "high_ad_rate"
289+
290+
291+
# Criterion 3a: extreme_ad_rate early-kill (FFM-847 follow-up).
292+
# Pure-pumper sources that post few but ~100% ad memes never reach the 30-meme
293+
# sample threshold on volume alone. Catch them at >=80% ad_rate with >=10 processed.
294+
295+
296+
@pytest.mark.asyncio
297+
async def test_snooze_on_extreme_ad_rate_below_standard_sample(conn: AsyncConnection):
298+
"""100% ad_rate with 12 processed memes (below the 30-meme standard threshold)
299+
must early-kill via 'extreme_ad_rate'."""
300+
await create_meme_source(conn, id=SOURCE_ID, status="parsing_enabled")
301+
await _seed_memes(conn, source_id=SOURCE_ID, n_ad=12, n_ok=0)
302+
await conn.commit()
303+
304+
result = await maybe_auto_snooze_source(SOURCE_ID, new_posts_count=5)
305+
306+
assert result == "extreme_ad_rate"
307+
source = await fetch_one(meme_source.select().where(meme_source.c.id == SOURCE_ID))
308+
assert source["status"] == MemeSourceStatus.SNOOZED.value
309+
assert source["data"]["snoozed_reason"] == "extreme_ad_rate"
310+
311+
312+
@pytest.mark.asyncio
313+
async def test_snooze_on_extreme_ad_rate_at_80_pct_boundary(conn: AsyncConnection):
314+
"""Threshold is `>= 0.80`. Exactly 80% (8 ad / 10 processed) must snooze."""
315+
await create_meme_source(conn, id=SOURCE_ID, status="parsing_enabled")
316+
await _seed_memes(conn, source_id=SOURCE_ID, n_ad=8, n_ok=2)
317+
await conn.commit()
318+
319+
result = await maybe_auto_snooze_source(SOURCE_ID, new_posts_count=5)
320+
321+
assert result == "extreme_ad_rate"
322+
323+
324+
@pytest.mark.asyncio
325+
async def test_no_snooze_on_extreme_ad_rate_below_min_sample(conn: AsyncConnection):
326+
"""9 processed memes (9 ad = 100% ad_rate) is below the 10-meme minimum sample
327+
for the early-kill. Don't snooze — wait for one more meme."""
328+
await create_meme_source(conn, id=SOURCE_ID, status="parsing_enabled")
329+
await _seed_memes(conn, source_id=SOURCE_ID, n_ad=9, n_ok=0)
330+
await conn.commit()
331+
332+
result = await maybe_auto_snooze_source(SOURCE_ID, new_posts_count=5)
333+
334+
assert result is None
335+
source = await fetch_one(meme_source.select().where(meme_source.c.id == SOURCE_ID))
336+
assert source["status"] == MemeSourceStatus.PARSING_ENABLED.value
337+
338+
339+
@pytest.mark.asyncio
340+
async def test_no_snooze_just_below_extreme_threshold(conn: AsyncConnection):
341+
"""79% ad_rate (mid-band: too high to be healthy, too low for early-kill, sample
342+
too small for standard gate) must NOT snooze. Wait for the standard gate to
343+
kick in once n_processed >= 30."""
344+
await create_meme_source(conn, id=SOURCE_ID, status="parsing_enabled")
345+
# 11 ad + 3 ok = 14 processed, ad_rate ≈ 78.6% — under 80%, under 30 sample
346+
await _seed_memes(conn, source_id=SOURCE_ID, n_ad=11, n_ok=3)
347+
await conn.commit()
348+
349+
result = await maybe_auto_snooze_source(SOURCE_ID, new_posts_count=5)
350+
351+
assert result is None
352+
source = await fetch_one(meme_source.select().where(meme_source.c.id == SOURCE_ID))
353+
assert source["status"] == MemeSourceStatus.PARSING_ENABLED.value

0 commit comments

Comments
 (0)