fix(core): split SEO fetch from loadEntry to avoid D1 result-set column limit on wide collections

Vallhalen · claude · Vallhalen · commit a4bfc3c7e3d8 · 2026-06-26T11:31:49.000+02:00
The content loader's single-query LEFT JOIN _emdash_seo added 5 alias columns to every result set, which pushed per-collection ec_* tables with ~95+ flat user fields past D1's per-query column limit (~100). The join failed with D1_ERROR: too many columns in result set, the error was wrapped as a generic Failed to load entry, and the call site surfaced a silent null. SEO is now fetched as a separate follow-up query and folded onto the row using the same alias names extractSeo() reads, so the public API is unchanged. The result set width is now bounded regardless of how wide the collection schema gets. One extra round trip per loadEntry, no behavior change at the API boundary. loadCollection was already join-free. Adds a regression test that exercises a 95-user-field collection with and without a SEO row, on both dialects. Closes #1600 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
diff --git a/.changeset/fix-d1-result-set-column-limit.md b/.changeset/fix-d1-result-set-column-limit.md
@@ -0,0 +1,5 @@
+---
+"emdash": patch
+---
+
+Fixes silent `null` entries on wide-schema collections under Cloudflare D1. The content loader's single-query `LEFT JOIN _emdash_seo` added 5 alias columns to every result set, which pushed collections with ~95+ flat user fields past D1's per-query column limit (~100). The query failed with `D1_ERROR: too many columns in result set`, the error was wrapped as a generic `Failed to load entry`, and the call site surfaced `null`. SEO is now fetched as a separate follow-up query and folded onto the row, keeping the result-set width bounded regardless of how wide the collection schema gets.
diff --git a/packages/core/src/loader.ts b/packages/core/src/loader.ts
@@ -994,40 +994,39 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
 				// When locale is specified, prefer locale-scoped slug match,
 				// but IDs are globally unique so always check id without locale scope.
 				//
-				// LEFT JOIN _emdash_seo folds per-entry SEO (canonical, noindex,
-				// etc.) into this single query at zero extra round-trip cost. The
-				// joined columns are surfaced as a nested data.seo object via
-				// extractSeo() and excluded from the generic field mapping. SEO is
-				// 1:1 with content (PK on collection+content_id), so the join never
-				// multiplies rows.
-				const seoSelect = sql.join(
-					Object.entries(SEO_COLUMN_ALIASES).map(
-						([col, alias]) => sql`${sql.ref(`s.${col}`)} AS ${sql.ref(alias)}`,
-					),
-				);
-				// Fold byline + taxonomy hydration into the content query (see
-				// foldedHydrationSelects), removing the two separate hydration
-				// round trips per fetch.
+				// Per-entry SEO (canonical, noindex, etc.) is fetched as a
+				// follow-up query and folded onto the row via SEO_COLUMN_ALIASES,
+				// preserving the data.seo shape that extractSeo() returns.
+				//
+				// We intentionally do NOT LEFT JOIN _emdash_seo here: that adds
+				// 5 alias columns to every result set, which can push wide
+				// flat-schema collections (common when porting from WordPress /
+				// ACF) past D1's per-result-set column limit (~100). The join
+				// failed with `D1_ERROR: too many columns in result set` and
+				// surfaced as a silent `null` entry at the call site. A separate
+				// SEO query is one extra round trip but is bounded in shape and
+				// works at any collection width.
+				//
+				// Byline + taxonomy hydration stays folded into the content
+				// query (see foldedHydrationSelects) because each is a single
+				// aggregated JSON column, so they add only two columns to the
+				// result set regardless of how many terms/credits an entry has.
 				const { terms: termsSelect, bylines: bylinesSelect } = foldedHydrationSelects(
 					db,
 					type,
 					"c",
 				);
 				const result = locale
 					? await sql<Record<string, unknown>>`
-							SELECT c.*, ${seoSelect}, ${termsSelect}, ${bylinesSelect}
+							SELECT c.*, ${termsSelect}, ${bylinesSelect}
 							FROM ${sql.ref(tableName)} AS c
-							LEFT JOIN ${sql.ref("_emdash_seo")} AS s
-								ON s.collection = ${type} AND s.content_id = c.id
 							WHERE c.deleted_at IS NULL
 							AND ((c.slug = ${id} AND c.locale = ${locale}) OR c.id = ${id})
 							LIMIT 1
 						`.execute(db)
 					: await sql<Record<string, unknown>>`
-							SELECT c.*, ${seoSelect}, ${termsSelect}, ${bylinesSelect}
+							SELECT c.*, ${termsSelect}, ${bylinesSelect}
 							FROM ${sql.ref(tableName)} AS c
-							LEFT JOIN ${sql.ref("_emdash_seo")} AS s
-								ON s.collection = ${type} AND s.content_id = c.id
 							WHERE c.deleted_at IS NULL
 							AND (c.slug = ${id} OR c.id = ${id})
 							LIMIT 1
@@ -1038,6 +1037,23 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
 					return undefined;
 				}
 
+				// Fold SEO onto the row using the same aliases the join used,
+				// so extractSeo() reads it transparently. Missing SEO row is
+				// expected (LEFT JOIN behavior preserved): extractSeo() returns
+				// null when the noIndex column is missing.
+				const seoResult = await sql<Record<string, unknown>>`
+					SELECT seo_title, seo_description, seo_image, seo_canonical, seo_no_index
+					FROM ${sql.ref("_emdash_seo")}
+					WHERE collection = ${type} AND content_id = ${row.id}
+					LIMIT 1
+				`.execute(db);
+				const seoRow = seoResult.rows[0];
+				if (seoRow) {
+					for (const [col, alias] of Object.entries(SEO_COLUMN_ALIASES)) {
+						row[alias] = seoRow[col];
+					}
+				}
+
 				const i18nConfig = virtualConfig?.i18n;
 				const i18nEnabled = i18nConfig && i18nConfig.locales.length > 1;
 				const entrySlug = rowStr(row, "slug") || rowStr(row, "id");
diff --git a/packages/core/tests/unit/loader-wide-collection.test.ts b/packages/core/tests/unit/loader-wide-collection.test.ts
@@ -0,0 +1,156 @@
+import { it, expect, beforeEach, afterEach } from "vitest";
+
+import { handleContentCreate } from "../../src/api/index.js";
+import { SchemaRegistry } from "../../src/schema/registry.js";
+import { SeoRepository } from "../../src/database/repositories/seo.js";
+import { emdashLoader } from "../../src/loader.js";
+import { runWithContext } from "../../src/request-context.js";
+import {
+	describeEachDialect,
+	setupForDialect,
+	teardownForDialect,
+	type DialectTestContext,
+} from "../utils/test-db.js";
+
+/**
+ * Regression test for #1600: loadEntry's SELECT shape on wide collections.
+ *
+ * When a per-collection `ec_*` table has many flat scalar columns (common when
+ * porting from WordPress / ACF or other builders where every section is a
+ * top-level field), the previous implementation did:
+ *
+ *   SELECT c.*, <5 SEO alias columns> FROM ec_table c LEFT JOIN _emdash_seo s
+ *
+ * On Cloudflare D1 the per-query result-set column limit (~100) made this
+ * fail with `D1_ERROR: too many columns in result set` for collections
+ * around 95+ user columns. The loader's try/catch wrapped it as a generic
+ * `Failed to load entry` error and the call site returned a silent `null`.
+ *
+ * The fix splits the query: fetch the row from the collection table without
+ * a SEO join, then fetch SEO separately and fold it onto the row using the
+ * same alias names extractSeo() reads. The result set stays bounded in width
+ * regardless of how many fields the collection has.
+ *
+ * Run on both dialects to keep parity with loader-seo.test.ts.
+ */
+describeEachDialect("Loader on wide-schema collections (#1600)", (dialect) => {
+	let ctx: DialectTestContext;
+	let seoRepo: SeoRepository;
+	const COLLECTION = "wide_collection";
+	const USER_FIELD_COUNT = 95;
+
+	beforeEach(async () => {
+		ctx = await setupForDialect(dialect);
+		const registry = new SchemaRegistry(ctx.db);
+
+		// Create a collection with SEO enabled and a large number of flat
+		// scalar fields. 95 user fields + 14 system columns + 5 SEO aliases
+		// would have been ~114 result-set columns under the old LEFT JOIN
+		// shape, well past D1's per-query limit.
+		await registry.createCollection({
+			slug: COLLECTION,
+			label: "Wide Collection",
+			labelSingular: "Wide Entry",
+		});
+		await registry.createField(COLLECTION, {
+			slug: "title",
+			label: "Title",
+			type: "string",
+		});
+		for (let i = 1; i <= USER_FIELD_COUNT; i++) {
+			await registry.createField(COLLECTION, {
+				slug: `field_${i}`,
+				label: `Field ${i}`,
+				type: "string",
+			});
+		}
+		// Enable SEO so extractSeo() has somewhere to read from.
+		await ctx.db
+			.updateTable("_emdash_collections")
+			.set({ has_seo: 1 })
+			.where("slug", "=", COLLECTION)
+			.execute();
+
+		seoRepo = new SeoRepository(ctx.db);
+	});
+
+	afterEach(async () => {
+		await teardownForDialect(ctx);
+	});
+
+	function load(idOrSlug: string) {
+		const loader = emdashLoader();
+		return runWithContext({ db: ctx.db }, () =>
+			loader.loadEntry!({ filter: { type: COLLECTION, id: idOrSlug } }),
+		);
+	}
+
+	it("loads an entry from a collection with 95+ flat user columns", async () => {
+		const data: Record<string, string> = { title: "Wide Entry" };
+		for (let i = 1; i <= USER_FIELD_COUNT; i++) {
+			data[`field_${i}`] = `value-${i}`;
+		}
+		const result = await handleContentCreate(ctx.db, COLLECTION, {
+			data,
+			status: "published",
+		});
+		if (!result.success) throw new Error("Failed to create entry");
+		const slug = result.data!.item.slug!;
+
+		const loaded = await load(slug);
+
+		expect(loaded).toBeDefined();
+		expect((loaded as { data: Record<string, unknown> }).data.title).toBe("Wide Entry");
+		// Spot-check a handful of user fields across the range.
+		const loadedData = (loaded as { data: Record<string, unknown> }).data;
+		expect(loadedData.field_1).toBe("value-1");
+		expect(loadedData.field_50).toBe("value-50");
+		expect(loadedData.field_95).toBe("value-95");
+	});
+
+	it("still attaches data.seo on wide collections (SEO follow-up query)", async () => {
+		const data: Record<string, string> = { title: "Wide With SEO" };
+		for (let i = 1; i <= USER_FIELD_COUNT; i++) {
+			data[`field_${i}`] = `value-${i}`;
+		}
+		const result = await handleContentCreate(ctx.db, COLLECTION, {
+			data,
+			status: "published",
+		});
+		if (!result.success) throw new Error("Failed to create entry");
+		const item = result.data!.item;
+
+		await seoRepo.upsert(COLLECTION, item.id, {
+			noIndex: true,
+			canonical: "https://example.com/wide",
+			title: "Wide SEO Title",
+		});
+
+		const loaded = await load(item.slug!);
+		const loadedData = (loaded as { data: Record<string, unknown> }).data;
+		const seo = loadedData.seo as Record<string, unknown> | undefined;
+
+		expect(seo).toBeDefined();
+		expect(seo!.noIndex).toBe(true);
+		expect(seo!.canonical).toBe("https://example.com/wide");
+		expect(seo!.title).toBe("Wide SEO Title");
+	});
+
+	it("omits data.seo when no SEO row exists, even on wide collections", async () => {
+		const data: Record<string, string> = { title: "No SEO" };
+		for (let i = 1; i <= USER_FIELD_COUNT; i++) {
+			data[`field_${i}`] = `value-${i}`;
+		}
+		const result = await handleContentCreate(ctx.db, COLLECTION, {
+			data,
+			status: "published",
+		});
+		if (!result.success) throw new Error("Failed to create entry");
+		const slug = result.data!.item.slug!;
+
+		const loaded = await load(slug);
+		const loadedData = (loaded as { data: Record<string, unknown> }).data;
+
+		expect(loadedData.seo).toBeUndefined();
+	});
+});

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"emdash": patch
 +---
++
 +Fixes silent `null` entries on wide-schema collections under Cloudflare D1. The content loader's single-query `LEFT JOIN _emdash_seo` added 5 alias columns to every result set, which pushed collections with ~95+ flat user fields past D1's per-query column limit (~100). The query failed with `D1_ERROR: too many columns in result set`, the error was wrapped as a generic `Failed to load entry`, and the call site surfaced `null`. SEO is now fetched as a separate follow-up query and folded onto the row, keeping the result-set width bounded regardless of how wide the collection schema gets.