diff --git a/AGENTS.md b/AGENTS.md index 317cac9c73..5760d1f383 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -89,6 +89,7 @@ The migration generator compares entities against the local database schema. Ens - When a GraphQL field must be nulled based on viewer permissions, define the rule in `src/graphorm/index.ts` as a shared transform/helper (for example `nullIfNotLoggedIn`). If a resolver cannot use GraphORM for the full query, reuse that GraphORM field mapping from the manual path instead of re-implementing the permission rule in a schema field resolver. - For offset-paginated GraphQL reads that only need `pageInfo.hasNextPage`, prefer overfetching one extra row and slicing it in the page generator. Avoid separate `COUNT(*)`/`COUNT(DISTINCT ...)` queries unless the client explicitly needs a total. - For sitemap pagination, prefer oldest-first ordering with a deterministic tie-breaker so lower-numbered sitemap files stay as static as possible and pages do not skip or duplicate rows. +- For post-based sitemaps, keep ordering consistent across sitemap types: use oldest-first ordering with a deterministic tie-breaker (`createdAt`, then `id`) unless there is a documented reason not to. **Data Layer:** diff --git a/__tests__/sitemaps.ts b/__tests__/sitemaps.ts index c49cffa300..c3bdc22479 100644 --- a/__tests__/sitemaps.ts +++ b/__tests__/sitemaps.ts @@ -7,6 +7,7 @@ import { DataSource, DeepPartial } from 'typeorm'; import createOrGetConnection from '../src/db'; import { AGENTS_DIGEST_SOURCE, + CollectionPost, Keyword, KeywordStatus, Post, @@ -246,6 +247,111 @@ describe('GET /sitemaps/posts-:page.xml', () => { }); }); +describe('GET /sitemaps/collections.xml', () => { + it('should return only qualified public collections ordered by time as xml', async () => { + const updatedAt = new Date('2024-02-01T12:00:00.123Z'); + + await con.getRepository(CollectionPost).insert([ + { + id: 'qc1', + shortId: 'qc1', + title: 'Qualified Collection', + sourceId: 'a', + type: PostType.Collection, + visible: true, + upvotes: 3, + collectionSources: ['a', 'b', 'c'], + metadataChangedAt: updatedAt, + createdAt: new Date('2024-02-01T12:00:02.000Z'), + }, + { + id: 'qc0', + shortId: 'qc0', + title: 'Earlier Qualified Collection', + sourceId: 'a', + type: PostType.Collection, + visible: true, + upvotes: 50, + collectionSources: ['a', 'b', 'c'], + metadataChangedAt: updatedAt, + createdAt: new Date('2024-02-01T12:00:01.000Z'), + }, + { + id: 'low-upvote-collection', + shortId: 'luc', + title: 'Low Upvote Collection', + sourceId: 'a', + type: PostType.Collection, + visible: true, + upvotes: 0, + collectionSources: ['a', 'b', 'c'], + }, + { + id: 'small-collection', + shortId: 'sc1', + title: 'Small Collection', + sourceId: 'a', + type: PostType.Collection, + visible: true, + upvotes: 4, + collectionSources: ['a', 'b'], + }, + { + id: 'hidden-collection', + shortId: 'hc1', + title: 'Hidden Collection', + sourceId: 'a', + type: PostType.Collection, + visible: false, + upvotes: 4, + collectionSources: ['a', 'b', 'c'], + }, + { + id: 'deleted-collection', + shortId: 'dc1', + title: 'Deleted Collection', + sourceId: 'a', + type: PostType.Collection, + visible: true, + deleted: true, + upvotes: 4, + collectionSources: ['a', 'b', 'c'], + }, + ]); + + const res = await request(app.server) + .get('/sitemaps/collections.xml') + .expect(200); + + expect(res.header['content-type']).toContain('application/xml'); + expect(res.header['cache-control']).toBeTruthy(); + expect(res.text).toContain( + '', + ); + expect(res.text).toContain( + 'http://localhost:5002/posts/qualified-collection-qc1', + ); + expect(res.text).toContain( + 'http://localhost:5002/posts/earlier-qualified-collection-qc0', + ); + expect(res.text).toContain('2024-02-01T12:00:00.123Z'); + expect(res.text).not.toContain('/posts/low-upvote-collection-luc'); + expect(res.text).not.toContain('/posts/small-collection-sc1'); + expect(res.text).not.toContain('/posts/hidden-collection-hc1'); + expect(res.text).not.toContain('/posts/deleted-collection-dc1'); + expect(res.text).not.toContain('/posts/p3-p3'); + expect( + res.text.indexOf( + 'http://localhost:5002/posts/earlier-qualified-collection-qc0', + ), + ).toBeLessThan( + res.text.indexOf( + 'http://localhost:5002/posts/qualified-collection-qc1', + ), + ); + }); +}); + describe('GET /sitemaps/tags.txt', () => { it('should return tags ordered alphabetically', async () => { const res = await request(app.server).get('/sitemaps/tags.txt').expect(200); @@ -343,6 +449,9 @@ describe('GET /sitemaps/index.xml', () => { expect(res.text).toContain( 'http://localhost:5002/api/sitemaps/evergreen-2.xml', ); + expect(res.text).toContain( + 'http://localhost:5002/api/sitemaps/collections.xml', + ); expect(res.text).toContain( 'http://localhost:5002/api/sitemaps/agents.xml', ); diff --git a/src/routes/sitemaps.ts b/src/routes/sitemaps.ts index 54c8f0c737..272fdd4c6b 100644 --- a/src/routes/sitemaps.ts +++ b/src/routes/sitemaps.ts @@ -182,6 +182,16 @@ const buildPaginatedPostSitemapStream = async ( ); }; +const buildSitemapXmlStream = async ( + con: DataSource, + buildQuery: (source: EntityManager) => SelectQueryBuilder, + getUrl: (row: Record) => string, +): Promise => { + const input = await streamReplicaQuery(con, buildQuery); + + return toSitemapUrlSetStream(input, getUrl, getSitemapRowLastmod); +}; + const getSitemapPageCount = (totalPosts: number): number => Math.max(1, Math.ceil(totalPosts / getPaginatedSitemapLimit())); @@ -252,6 +262,30 @@ const buildEvergreenSitemapQuery = ( page, ); +const buildCollectionsSitemapQuery = ( + source: DataSource | EntityManager, +): SelectQueryBuilder => + applyPostsSitemapOrder( + source + .createQueryBuilder() + .select('p.slug', 'slug') + .addSelect('p."metadataChangedAt"', 'lastmod') + .from(Post, 'p') + .where('p.type = :type', { type: PostType.Collection }) + .andWhere('NOT p.private') + .andWhere('NOT p.banned') + .andWhere('NOT p.deleted') + .andWhere('p.visible = true') + .andWhere('p.upvotes >= :minUpvotes', { minUpvotes: 1 }) + .andWhere( + 'COALESCE(array_length(p."collectionSources", 1), 0) >= :minSources', + { + minSources: 3, + }, + ) + .limit(DEFAULT_SITEMAP_LIMIT), + ); + const buildTagsSitemapQuery = ( source: DataSource | EntityManager, ): SelectQueryBuilder => @@ -399,6 +433,9 @@ const getSitemapIndexXml = ( ${postsSitemaps} ${evergreenSitemaps} + + ${escapeXml(`${prefix}/api/sitemaps/collections.xml`)} + ${escapeXml(`${prefix}/api/sitemaps/tags.xml`)} @@ -521,6 +558,20 @@ export default async function (fastify: FastifyInstance): Promise { .send(await buildEvergreenSitemapStream(con, page)); }); + fastify.get('/collections.xml', async (_, res) => { + const con = await createOrGetConnection(); + const prefix = getSitemapUrlPrefix(); + + return res + .type('application/xml') + .header('cache-control', SITEMAP_CACHE_CONTROL) + .send( + await buildSitemapXmlStream(con, buildCollectionsSitemapQuery, (row) => + getPostSitemapUrl(prefix, row.slug), + ), + ); + }); + fastify.get('/tags.txt', async (_, res) => { const con = await createOrGetConnection(); const prefix = getSitemapUrlPrefix(); @@ -538,16 +589,13 @@ export default async function (fastify: FastifyInstance): Promise { fastify.get('/tags.xml', async (_, res) => { const con = await createOrGetConnection(); const prefix = getSitemapUrlPrefix(); - const input = await streamReplicaQuery(con, buildTagsSitemapQuery); return res .type('application/xml') .header('cache-control', SITEMAP_CACHE_CONTROL) .send( - toSitemapUrlSetStream( - input, - (row) => getTagSitemapUrl(prefix, row.value), - getSitemapRowLastmod, + await buildSitemapXmlStream(con, buildTagsSitemapQuery, (row) => + getTagSitemapUrl(prefix, row.value), ), ); }); @@ -555,16 +603,13 @@ export default async function (fastify: FastifyInstance): Promise { fastify.get('/agents.xml', async (_, res) => { const con = await createOrGetConnection(); const prefix = getSitemapUrlPrefix(); - const input = await streamReplicaQuery(con, buildAgentsSitemapQuery); return res .type('application/xml') .header('cache-control', SITEMAP_CACHE_CONTROL) .send( - toSitemapUrlSetStream( - input, - (row) => getAgentSitemapUrl(prefix, row.entity), - getSitemapRowLastmod, + await buildSitemapXmlStream(con, buildAgentsSitemapQuery, (row) => + getAgentSitemapUrl(prefix, row.entity), ), ); }); @@ -572,16 +617,13 @@ export default async function (fastify: FastifyInstance): Promise { fastify.get('/agents-digest.xml', async (_, res) => { const con = await createOrGetConnection(); const prefix = getSitemapUrlPrefix(); - const input = await streamReplicaQuery(con, buildAgentsDigestSitemapQuery); return res .type('application/xml') .header('cache-control', SITEMAP_CACHE_CONTROL) .send( - toSitemapUrlSetStream( - input, - (row) => getPostSitemapUrl(prefix, row.slug), - getSitemapRowLastmod, + await buildSitemapXmlStream(con, buildAgentsDigestSitemapQuery, (row) => + getPostSitemapUrl(prefix, row.slug), ), ); }); @@ -589,16 +631,13 @@ export default async function (fastify: FastifyInstance): Promise { fastify.get('/sources.xml', async (_, res) => { const con = await createOrGetConnection(); const prefix = getSitemapUrlPrefix(); - const input = await streamReplicaQuery(con, buildSourcesSitemapQuery); return res .type('application/xml') .header('cache-control', SITEMAP_CACHE_CONTROL) .send( - toSitemapUrlSetStream( - input, - (row) => getSourceSitemapUrl(prefix, row.handle), - getSitemapRowLastmod, + await buildSitemapXmlStream(con, buildSourcesSitemapQuery, (row) => + getSourceSitemapUrl(prefix, row.handle), ), ); }); @@ -606,32 +645,26 @@ export default async function (fastify: FastifyInstance): Promise { fastify.get('/squads.xml', async (_, res) => { const con = await createOrGetConnection(); const prefix = getSitemapUrlPrefix(); - const input = await streamReplicaQuery(con, buildSquadsSitemapQuery); return res .type('application/xml') .header('cache-control', SITEMAP_CACHE_CONTROL) .send( - toSitemapUrlSetStream( - input, - (row) => getSquadSitemapUrl(prefix, row.handle), - getSitemapRowLastmod, + await buildSitemapXmlStream(con, buildSquadsSitemapQuery, (row) => + getSquadSitemapUrl(prefix, row.handle), ), ); }); fastify.get('/users.xml', async (_, res) => { const con = await createOrGetConnection(); - const input = await streamReplicaQuery(con, buildUsersSitemapQuery); return res .type('application/xml') .header('cache-control', SITEMAP_CACHE_CONTROL) .send( - toSitemapUrlSetStream( - input, - (row) => getUserProfileUrl(row.username), - getSitemapRowLastmod, + await buildSitemapXmlStream(con, buildUsersSitemapQuery, (row) => + getUserProfileUrl(row.username), ), ); });