Skip to content

Commit e12d849

Browse files
committed
feat: personalized 'For you' feed (topic follow/mute + affinity)
Adds opt-in feed personalization built on the topic vocabulary: - Schema (migration 0039, additive): user_topic_pref (follow/mute) and user_topic_affinity (implicit interest, time-decayed). - feedRanking: pure, unit-tested scoring — a transparent weighted blend of recency, quality, and topic affinity, with muted topics filtered out. - topicAffinity: derive per-user affinity from votes/bookmarks/comments through post_topic edges with decay; recomputed for active users by the nightly cron. - profile.getTopicPrefs / setTopicPref: manage follows and mutes. - content.getForYouFeed: re-rank a recent candidate window for the user; cold start (no signal) falls back to recency, so the existing feed is untouched. Also trims verbose comments across the content-pipeline modules.
1 parent 4136505 commit e12d849

13 files changed

Lines changed: 7127 additions & 51 deletions

app/api/cron/daily-review/route.ts

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -30,31 +30,25 @@ import {
3030
type TopicVocabEntry,
3131
} from "@/server/lib/contentAnalysis";
3232
import { autoReview } from "@/server/lib/autoReview";
33+
import {
34+
findRecentlyActiveUsers,
35+
recomputeUserAffinity,
36+
} from "@/server/lib/topicAffinity";
3337
import sendEmail from "@/utils/sendEmail";
3438

35-
// Nightly review cron. Auth via Bearer CRON_SECRET (a headless scheduler can't
36-
// use admin-session auth); unset secret refuses to run (500), wrong/missing
37-
// token 401. Wired via AWS Lambda + EventBridge (cdk/lib/cron-stack.ts).
38-
//
39-
// Four incremental passes:
40-
// 1. topic + sentiment tagging (posts)
41-
// 2. quality / spam scoring (posts) — passes 1+2 share one Bedrock call
42-
// 3. re-screen moderation (posts + comments) -> reports queue (source=system)
43-
// 4. daily digest -> email the founder only when something needs attention
44-
//
45-
// Everything is incremental (per-row analyzedAt / moderatedAt watermark) and
46-
// capped per run, so an empty worklist is a near-zero-cost no-op and a backfill
47-
// can't blow the Lambda timeout. Each item is isolated (try/catch + Sentry) so
48-
// one bad row never kills the batch.
39+
// Nightly review cron (auth via CRON_SECRET; invoked by EventBridge — see
40+
// cdk/lib/cron-stack.ts). Incremental, capped passes that no-op on an empty
41+
// worklist: topic/sentiment tagging, quality scoring, post+comment moderation
42+
// re-screen, affinity recompute, and a digest email. Each item is isolated
43+
// (try/catch + Sentry) so one bad row never kills the batch.
4944

5045
export const dynamic = "force-dynamic";
5146
export const maxDuration = 300;
5247

5348
const POST_CAP = 100;
5449
const COMMENT_CAP = 200;
55-
// Sentinel modelId for rows scored by the cheap heuristic (Bedrock disabled), so
56-
// they're distinguishable from human-curated rows (modelId IS NULL) and can be
57-
// upgraded once Bedrock is enabled.
50+
// Sentinel modelId for heuristic-scored rows (Bedrock off), so they're distinct
51+
// from human-curated rows (modelId IS NULL) and can be upgraded once it's on.
5852
const HEURISTIC_MODEL = "heuristic";
5953

6054
function isAuthorized(request: Request): boolean {
@@ -124,10 +118,8 @@ async function reviewPosts(
124118
const bedrock = isBedrockEnabled();
125119
const now = new Date().toISOString();
126120

127-
// Incremental worklist: published posts that have never been analysed, whose
128-
// AI metadata is stale (post edited / schema bumped), or that only have a
129-
// heuristic placeholder now that Bedrock is available. Rows with modelId IS
130-
// NULL are human-curated and deliberately skipped.
121+
// Worklist: published posts never analysed, stale (edited / schema bumped), or
122+
// a heuristic placeholder now Bedrock is on. modelId IS NULL = human-curated, skip.
131123
const staleBranches = [
132124
gt(posts.updatedAt, post_metadata.analyzedAt),
133125
lt(post_metadata.schemaVersion, ANALYSIS_SCHEMA_VERSION),
@@ -384,6 +376,25 @@ async function sendDigest(summary: {
384376
return true;
385377
}
386378

379+
const AFFINITY_USER_CAP = 500;
380+
381+
// Recompute implicit topic affinity for users who interacted in the last 24h.
382+
async function reviewAffinity(): Promise<{ usersUpdated: number }> {
383+
const since = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
384+
const now = Date.now();
385+
const users = await findRecentlyActiveUsers(db, since, AFFINITY_USER_CAP);
386+
let usersUpdated = 0;
387+
for (const userId of users) {
388+
try {
389+
await recomputeUserAffinity(db, userId, now);
390+
usersUpdated += 1;
391+
} catch (err) {
392+
Sentry.captureException(err);
393+
}
394+
}
395+
return { usersUpdated };
396+
}
397+
387398
async function loadVocab(): Promise<{
388399
vocab: TopicVocabEntry[];
389400
slugToId: Map<string, number>;
@@ -414,13 +425,15 @@ async function handle(request: Request) {
414425
const { vocab, slugToId } = await loadVocab();
415426
const postResult = await reviewPosts(vocab, slugToId);
416427
const commentResult = await reviewComments();
428+
const affinityResult = await reviewAffinity();
417429

418430
const summary = {
419431
postsAnalyzed: postResult.analyzed,
420432
postsFlagged: postResult.flagged,
421433
proposedTopics: postResult.proposed,
422434
commentsModerated: commentResult.moderated,
423435
commentsFlagged: commentResult.flagged,
436+
affinityUsersUpdated: affinityResult.usersUpdated,
424437
};
425438

426439
const digestSent = await sendDigest(summary);
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
CREATE TYPE "public"."topic_pref" AS ENUM('follow', 'mute');--> statement-breakpoint
2+
CREATE TABLE "user_topic_affinity" (
3+
"user_id" text NOT NULL,
4+
"topic_id" integer NOT NULL,
5+
"score" real NOT NULL,
6+
"updated_at" timestamp(3) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
7+
CONSTRAINT "user_topic_affinity_user_id_topic_id_pk" PRIMARY KEY("user_id","topic_id")
8+
);
9+
--> statement-breakpoint
10+
CREATE TABLE "user_topic_pref" (
11+
"user_id" text NOT NULL,
12+
"topic_id" integer NOT NULL,
13+
"pref" "topic_pref" NOT NULL,
14+
"created_at" timestamp(3) with time zone DEFAULT CURRENT_TIMESTAMP NOT NULL,
15+
CONSTRAINT "user_topic_pref_user_id_topic_id_pk" PRIMARY KEY("user_id","topic_id")
16+
);
17+
--> statement-breakpoint
18+
ALTER TABLE "user_topic_affinity" ADD CONSTRAINT "user_topic_affinity_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
19+
ALTER TABLE "user_topic_affinity" ADD CONSTRAINT "user_topic_affinity_topic_id_topic_id_fk" FOREIGN KEY ("topic_id") REFERENCES "public"."topic"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
20+
ALTER TABLE "user_topic_pref" ADD CONSTRAINT "user_topic_pref_user_id_user_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."user"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
21+
ALTER TABLE "user_topic_pref" ADD CONSTRAINT "user_topic_pref_topic_id_topic_id_fk" FOREIGN KEY ("topic_id") REFERENCES "public"."topic"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
22+
CREATE INDEX "user_topic_affinity_user_id_idx" ON "user_topic_affinity" USING btree ("user_id");--> statement-breakpoint
23+
CREATE INDEX "user_topic_pref_user_id_idx" ON "user_topic_pref" USING btree ("user_id");

0 commit comments

Comments
 (0)