From 0a6fbaf947a6558e5186eb01bc61f4ef9df47a53 Mon Sep 17 00:00:00 2001 From: idoshamun Date: Tue, 31 Mar 2026 11:09:29 +0000 Subject: [PATCH 1/2] feat: lower evergreen sitemap threshold --- __tests__/sitemaps.ts | 111 ++++++++++++++++++++++++++++++++++++++++- src/routes/sitemaps.ts | 107 +++++++++++++++++++++++++++++++-------- 2 files changed, 196 insertions(+), 22 deletions(-) diff --git a/__tests__/sitemaps.ts b/__tests__/sitemaps.ts index f4757c4847..7229eec931 100644 --- a/__tests__/sitemaps.ts +++ b/__tests__/sitemaps.ts @@ -269,6 +269,38 @@ describe('GET /sitemaps/tags.xml', () => { describe('GET /sitemaps/index.xml', () => { it('should return sitemap index xml with all paginated post sitemaps', async () => { + const oldDate = new Date(now.getTime() - 91 * ONE_DAY_IN_SECONDS * 1000); + + await con.getRepository(Post).insert([ + { + id: 'evergreen-index-1', + shortId: 'ei1', + title: 'Evergreen Index 1', + sourceId: 'a', + createdAt: oldDate, + type: PostType.Article, + upvotes: 10, + }, + { + id: 'evergreen-index-2', + shortId: 'ei2', + title: 'Evergreen Index 2', + sourceId: 'a', + createdAt: new Date(oldDate.getTime() - 1000), + type: PostType.Article, + upvotes: 11, + }, + { + id: 'evergreen-index-3', + shortId: 'ei3', + title: 'Evergreen Index 3', + sourceId: 'a', + createdAt: new Date(oldDate.getTime() - 2000), + type: PostType.Article, + upvotes: 12, + }, + ]); + const res = await request(app.server) .get('/sitemaps/index.xml') .expect(200); @@ -285,7 +317,10 @@ describe('GET /sitemaps/index.xml', () => { 'http://localhost:5002/api/sitemaps/posts-2.xml', ); expect(res.text).toContain( - 'http://localhost:5002/api/sitemaps/tags.xml', + 'http://localhost:5002/api/sitemaps/evergreen.xml', + ); + expect(res.text).toContain( + 'http://localhost:5002/api/sitemaps/evergreen-2.xml', ); expect(res.text).toContain( 'http://localhost:5002/api/sitemaps/agents.xml', @@ -296,6 +331,9 @@ describe('GET /sitemaps/index.xml', () => { expect(res.text).toContain( 'http://localhost:5002/api/sitemaps/squads.xml', ); + expect(res.text).toContain( + 'http://localhost:5002/api/sitemaps/tags.xml', + ); }); }); @@ -431,6 +469,77 @@ describe('GET /sitemaps/squads.xml', () => { }); describe('GET /sitemaps/evergreen.xml', () => { + it('should include posts with at least 10 upvotes and paginate older posts', async () => { + const oldDate = new Date(now.getTime() - 91 * ONE_DAY_IN_SECONDS * 1000); + + await con.getRepository(Post).insert([ + { + id: 'evergreen-min-threshold', + shortId: 'emt', + title: 'Evergreen Min Threshold', + sourceId: 'a', + createdAt: oldDate, + type: PostType.Article, + upvotes: 10, + }, + { + id: 'evergreen-next-page', + shortId: 'enp', + title: 'Evergreen Next Page', + sourceId: 'a', + createdAt: new Date(oldDate.getTime() - 1000), + type: PostType.Article, + upvotes: 11, + }, + { + id: 'evergreen-third-page', + shortId: 'etp', + title: 'Evergreen Third Page', + sourceId: 'a', + createdAt: new Date(oldDate.getTime() - 2000), + type: PostType.Article, + upvotes: 12, + }, + { + id: 'evergreen-below-threshold', + shortId: 'ebt', + title: 'Evergreen Below Threshold', + sourceId: 'a', + createdAt: new Date(oldDate.getTime() - 3000), + type: PostType.Article, + upvotes: 9, + }, + ]); + + const firstPage = await request(app.server) + .get('/sitemaps/evergreen.xml') + .expect(200); + const secondPage = await request(app.server) + .get('/sitemaps/evergreen-2.xml') + .expect(200); + + expect(firstPage.header['content-type']).toContain('application/xml'); + expect(firstPage.text).toContain( + '/posts/evergreen-third-page-evergreen-third-page', + ); + expect(firstPage.text).toContain( + '/posts/evergreen-next-page-evergreen-next-page', + ); + expect(firstPage.text).not.toContain( + '/posts/evergreen-min-threshold-evergreen-min-threshold', + ); + expect(firstPage.text).not.toContain( + '/posts/evergreen-below-threshold-ebt', + ); + + expect(secondPage.text).toContain( + '/posts/evergreen-min-threshold-evergreen-min-threshold', + ); + expect(secondPage.text).not.toContain( + '/posts/evergreen-below-threshold-ebt', + ); + }); + it('should exclude posts by low-reputation authors', async () => { await con.getRepository(User).save({ id: 'low-rep-sitemap', diff --git a/src/routes/sitemaps.ts b/src/routes/sitemaps.ts index 409bb0feaa..c180051abc 100644 --- a/src/routes/sitemaps.ts +++ b/src/routes/sitemaps.ts @@ -174,13 +174,11 @@ const buildPostSitemapStream = async ( ); }; -const buildEvergreenSitemapQuery = ( +const buildEvergreenSitemapBaseQuery = ( source: DataSource | EntityManager, ): SelectQueryBuilder => source .createQueryBuilder() - .select('p.slug', 'slug') - .addSelect('p."metadataChangedAt"', 'lastmod') .from(Post, 'p') .leftJoin(User, 'u', 'p."authorId" = u.id') .where('p.type NOT IN (:...types)', { types: [PostType.Welcome] }) @@ -188,10 +186,20 @@ const buildEvergreenSitemapQuery = ( .andWhere('NOT p.banned') .andWhere('NOT p.deleted') .andWhere('p."createdAt" <= current_timestamp - interval \'90 day\'') - .andWhere('p.upvotes >= :minUpvotes', { minUpvotes: 50 }) - .andWhere('(u.id is null or u.reputation > 10)') - .orderBy('p.upvotes', 'DESC') - .limit(DEFAULT_SITEMAP_LIMIT); + .andWhere('p.upvotes >= :minUpvotes', { minUpvotes: 10 }) + .andWhere('(u.id is null or u.reputation > 10)'); + +const buildEvergreenSitemapQuery = ( + source: DataSource | EntityManager, + page: number, +): SelectQueryBuilder => + buildEvergreenSitemapBaseQuery(source) + .select('p.slug', 'slug') + .addSelect('p."metadataChangedAt"', 'lastmod') + .orderBy('p."createdAt"', 'ASC') + .addOrderBy('p.id', 'ASC') + .limit(getPostsSitemapLimit()) + .offset((page - 1) * getPostsSitemapLimit()); const buildTagsSitemapQuery = ( source: DataSource | EntityManager, @@ -253,6 +261,14 @@ const getPostsSitemapPath = (page: number): string => const getPostsSitemapPageCount = (totalPosts: number): number => Math.max(1, Math.ceil(totalPosts / getPostsSitemapLimit())); +const getEvergreenSitemapPath = (page: number): string => + page === 1 + ? '/api/sitemaps/evergreen.xml' + : `/api/sitemaps/evergreen-${page}.xml`; + +const getEvergreenSitemapPageCount = (totalPosts: number): number => + Math.max(1, Math.ceil(totalPosts / getPostsSitemapLimit())); + const getPostsSitemapCount = async (con: DataSource): Promise => { const queryRunner = con.createQueryRunner('slave'); @@ -263,7 +279,36 @@ const getPostsSitemapCount = async (con: DataSource): Promise => { } }; -const getSitemapIndexXml = (postsSitemapCount: number): string => { +const getEvergreenSitemapCount = async (con: DataSource): Promise => { + const queryRunner = con.createQueryRunner('slave'); + + try { + return await buildEvergreenSitemapBaseQuery(queryRunner.manager).getCount(); + } finally { + await queryRunner.release(); + } +}; + +const buildEvergreenSitemapStream = async ( + con: DataSource, + page: number, +): Promise => { + const prefix = getSitemapUrlPrefix(); + const input = await streamReplicaQuery(con, (source) => + buildEvergreenSitemapQuery(source, page), + ); + + return toSitemapUrlSetStream( + input, + (row) => getPostSitemapUrl(prefix, row.slug), + getSitemapRowLastmod, + ); +}; + +const getSitemapIndexXml = ( + postsSitemapCount: number, + evergreenSitemapCount: number, +): string => { const prefix = getSitemapUrlPrefix(); const postsSitemaps = Array.from( { length: postsSitemapCount }, @@ -275,13 +320,21 @@ const getSitemapIndexXml = (postsSitemapCount: number): string => { `; }, ).join('\n'); + const evergreenSitemaps = Array.from( + { length: evergreenSitemapCount }, + (_, index) => { + const page = index + 1; + + return ` + ${escapeXml(`${prefix}${getEvergreenSitemapPath(page)}`)} + `; + }, + ).join('\n'); return ` ${postsSitemaps} - - ${escapeXml(`${prefix}/api/sitemaps/evergreen.xml`)} - +${evergreenSitemaps} ${escapeXml(`${prefix}/api/sitemaps/tags.xml`)} @@ -374,19 +427,28 @@ export default async function (fastify: FastifyInstance): Promise { fastify.get('/evergreen.xml', async (_, res) => { const con = await createOrGetConnection(); - const prefix = getSitemapUrlPrefix(); - const input = await streamReplicaQuery(con, buildEvergreenSitemapQuery); return res .type('application/xml') .header('cache-control', SITEMAP_CACHE_CONTROL) - .send( - toSitemapUrlSetStream( - input, - (row) => getPostSitemapUrl(prefix, row.slug), - getSitemapRowLastmod, - ), - ); + .send(await buildEvergreenSitemapStream(con, 1)); + }); + + fastify.get<{ + Params: { page: string }; + }>('/evergreen-:page.xml', async (req, res) => { + const page = Number.parseInt(req.params.page, 10); + + if (!Number.isInteger(page) || page < 1) { + return res.code(404).send(); + } + + const con = await createOrGetConnection(); + + return res + .type('application/xml') + .header('cache-control', SITEMAP_CACHE_CONTROL) + .send(await buildEvergreenSitemapStream(con, page)); }); fastify.get('/tags.txt', async (_, res) => { @@ -476,10 +538,13 @@ export default async function (fastify: FastifyInstance): Promise { const postsSitemapCount = getPostsSitemapPageCount( await getPostsSitemapCount(con), ); + const evergreenSitemapCount = getEvergreenSitemapPageCount( + await getEvergreenSitemapCount(con), + ); return res .type('application/xml') .header('cache-control', SITEMAP_CACHE_CONTROL) - .send(getSitemapIndexXml(postsSitemapCount)); + .send(getSitemapIndexXml(postsSitemapCount, evergreenSitemapCount)); }); } From fd3d33f724b6877307498f8a67075fe8d95526d2 Mon Sep 17 00:00:00 2001 From: idoshamun Date: Tue, 31 Mar 2026 11:13:31 +0000 Subject: [PATCH 2/2] refactor: simplify sitemap pagination helpers --- src/routes/sitemaps.ts | 168 ++++++++++++++++++++--------------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/src/routes/sitemaps.ts b/src/routes/sitemaps.ts index c180051abc..af9583909c 100644 --- a/src/routes/sitemaps.ts +++ b/src/routes/sitemaps.ts @@ -27,7 +27,7 @@ const ARENA_SITEMAP_GROUP_IDS = [ '970ab2c9-f845-4822-82f0-02169713b814', ]; -const getPostsSitemapLimit = (): number => { +const getPaginatedSitemapLimit = (): number => { const limit = Number.parseInt(process.env.SITEMAP_LIMIT || '', 10); return Number.isInteger(limit) && limit > 0 ? limit : DEFAULT_SITEMAP_LIMIT; @@ -139,32 +139,35 @@ const applyPostsSitemapOrder = ( ): SelectQueryBuilder => query.orderBy('p."createdAt"', 'ASC').addOrderBy('p.id', 'ASC'); -const buildPostsSitemapQuery = ( - source: DataSource | EntityManager, +const applyPaginatedSitemapWindow = ( + query: SelectQueryBuilder, page: number, ): SelectQueryBuilder => - applyPostsSitemapOrder( - buildPostsSitemapBaseQuery(source) - .select('p.slug', 'slug') - .addSelect('p."metadataChangedAt"', 'lastmod') - .limit(getPostsSitemapLimit()) - .offset((page - 1) * getPostsSitemapLimit()), - ); + query + .limit(getPaginatedSitemapLimit()) + .offset((page - 1) * getPaginatedSitemapLimit()); -const buildPostsSitemapTextQuery = ( +const buildPostsSitemapQuery = ( source: DataSource | EntityManager, + page: number, ): SelectQueryBuilder => - applyPostsSitemapOrder( - buildPostsSitemapBaseQuery(source).select('p.slug', 'slug'), + applyPaginatedSitemapWindow( + applyPostsSitemapOrder( + buildPostsSitemapBaseQuery(source) + .select('p.slug', 'slug') + .addSelect('p."metadataChangedAt"', 'lastmod'), + ), + page, ); -const buildPostSitemapStream = async ( +const buildPaginatedPostSitemapStream = async ( con: DataSource, page: number, + buildQuery: (source: EntityManager, page: number) => SelectQueryBuilder, ): Promise => { const prefix = getSitemapUrlPrefix(); const input = await streamReplicaQuery(con, (source) => - buildPostsSitemapQuery(source, page), + buildQuery(source, page), ); return toSitemapUrlSetStream( @@ -174,6 +177,48 @@ const buildPostSitemapStream = async ( ); }; +const getSitemapPageCount = (totalPosts: number): number => + Math.max(1, Math.ceil(totalPosts / getPaginatedSitemapLimit())); + +const getReplicaQueryCount = async ( + con: DataSource, + buildQuery: (source: EntityManager) => SelectQueryBuilder, +): Promise => { + const queryRunner = con.createQueryRunner('slave'); + + try { + return await buildQuery(queryRunner.manager).getCount(); + } finally { + await queryRunner.release(); + } +}; + +const buildSitemapIndexEntries = ( + prefix: string, + sitemapCount: number, + getPath: (page: number) => string, +): string => + Array.from({ length: sitemapCount }, (_, index) => { + const page = index + 1; + + return ` + ${escapeXml(`${prefix}${getPath(page)}`)} + `; + }).join('\n'); + +const buildPostsSitemapTextQuery = ( + source: DataSource | EntityManager, +): SelectQueryBuilder => + applyPostsSitemapOrder( + buildPostsSitemapBaseQuery(source).select('p.slug', 'slug'), + ); + +const buildPostSitemapStream = async ( + con: DataSource, + page: number, +): Promise => + buildPaginatedPostSitemapStream(con, page, buildPostsSitemapQuery); + const buildEvergreenSitemapBaseQuery = ( source: DataSource | EntityManager, ): SelectQueryBuilder => @@ -193,13 +238,14 @@ const buildEvergreenSitemapQuery = ( source: DataSource | EntityManager, page: number, ): SelectQueryBuilder => - buildEvergreenSitemapBaseQuery(source) - .select('p.slug', 'slug') - .addSelect('p."metadataChangedAt"', 'lastmod') - .orderBy('p."createdAt"', 'ASC') - .addOrderBy('p.id', 'ASC') - .limit(getPostsSitemapLimit()) - .offset((page - 1) * getPostsSitemapLimit()); + applyPaginatedSitemapWindow( + buildEvergreenSitemapBaseQuery(source) + .select('p.slug', 'slug') + .addSelect('p."metadataChangedAt"', 'lastmod') + .orderBy('p."createdAt"', 'ASC') + .addOrderBy('p.id', 'ASC'), + page, + ); const buildTagsSitemapQuery = ( source: DataSource | EntityManager, @@ -258,78 +304,32 @@ const buildSquadsSitemapQuery = ( const getPostsSitemapPath = (page: number): string => page === 1 ? '/api/sitemaps/posts-1.xml' : `/api/sitemaps/posts-${page}.xml`; -const getPostsSitemapPageCount = (totalPosts: number): number => - Math.max(1, Math.ceil(totalPosts / getPostsSitemapLimit())); - const getEvergreenSitemapPath = (page: number): string => page === 1 ? '/api/sitemaps/evergreen.xml' : `/api/sitemaps/evergreen-${page}.xml`; -const getEvergreenSitemapPageCount = (totalPosts: number): number => - Math.max(1, Math.ceil(totalPosts / getPostsSitemapLimit())); - -const getPostsSitemapCount = async (con: DataSource): Promise => { - const queryRunner = con.createQueryRunner('slave'); - - try { - return await buildPostsSitemapBaseQuery(queryRunner.manager).getCount(); - } finally { - await queryRunner.release(); - } -}; - -const getEvergreenSitemapCount = async (con: DataSource): Promise => { - const queryRunner = con.createQueryRunner('slave'); - - try { - return await buildEvergreenSitemapBaseQuery(queryRunner.manager).getCount(); - } finally { - await queryRunner.release(); - } -}; - const buildEvergreenSitemapStream = async ( con: DataSource, page: number, -): Promise => { - const prefix = getSitemapUrlPrefix(); - const input = await streamReplicaQuery(con, (source) => - buildEvergreenSitemapQuery(source, page), - ); - - return toSitemapUrlSetStream( - input, - (row) => getPostSitemapUrl(prefix, row.slug), - getSitemapRowLastmod, - ); -}; +): Promise => + buildPaginatedPostSitemapStream(con, page, buildEvergreenSitemapQuery); const getSitemapIndexXml = ( postsSitemapCount: number, evergreenSitemapCount: number, ): string => { const prefix = getSitemapUrlPrefix(); - const postsSitemaps = Array.from( - { length: postsSitemapCount }, - (_, index) => { - const page = index + 1; - - return ` - ${escapeXml(`${prefix}${getPostsSitemapPath(page)}`)} - `; - }, - ).join('\n'); - const evergreenSitemaps = Array.from( - { length: evergreenSitemapCount }, - (_, index) => { - const page = index + 1; - - return ` - ${escapeXml(`${prefix}${getEvergreenSitemapPath(page)}`)} - `; - }, - ).join('\n'); + const postsSitemaps = buildSitemapIndexEntries( + prefix, + postsSitemapCount, + getPostsSitemapPath, + ); + const evergreenSitemaps = buildSitemapIndexEntries( + prefix, + evergreenSitemapCount, + getEvergreenSitemapPath, + ); return ` @@ -535,11 +535,11 @@ export default async function (fastify: FastifyInstance): Promise { fastify.get('/index.xml', async (_, res) => { const con = await createOrGetConnection(); - const postsSitemapCount = getPostsSitemapPageCount( - await getPostsSitemapCount(con), + const postsSitemapCount = getSitemapPageCount( + await getReplicaQueryCount(con, buildPostsSitemapBaseQuery), ); - const evergreenSitemapCount = getEvergreenSitemapPageCount( - await getEvergreenSitemapCount(con), + const evergreenSitemapCount = getSitemapPageCount( + await getReplicaQueryCount(con, buildEvergreenSitemapBaseQuery), ); return res