From 1c27423228023f5f274dc7d6fc1b1f0b811cefc9 Mon Sep 17 00:00:00 2001 From: idoshamun Date: Tue, 31 Mar 2026 12:02:31 +0000 Subject: [PATCH 1/2] feat(seo): add sources sitemap --- __tests__/sitemaps.ts | 163 +++++++++++++++++++++++++++++++++++++++++ src/routes/sitemaps.ts | 69 +++++++++++++++++ 2 files changed, 232 insertions(+) diff --git a/__tests__/sitemaps.ts b/__tests__/sitemaps.ts index 1a38075225..cc9813eb97 100644 --- a/__tests__/sitemaps.ts +++ b/__tests__/sitemaps.ts @@ -14,6 +14,7 @@ import { SentimentEntity, SentimentGroup, Source, + SourceType, User, } from '../src/entity'; import { getSitemapRowLastmod } from '../src/routes/sitemaps'; @@ -328,6 +329,9 @@ describe('GET /sitemaps/index.xml', () => { expect(res.text).toContain( 'http://localhost:5002/api/sitemaps/agents-digest.xml', ); + expect(res.text).toContain( + 'http://localhost:5002/api/sitemaps/sources.xml', + ); expect(res.text).toContain( 'http://localhost:5002/api/sitemaps/squads.xml', ); @@ -340,6 +344,165 @@ describe('GET /sitemaps/index.xml', () => { }); }); +describe('GET /sitemaps/sources.xml', () => { + it('should include only qualified public machine sources', async () => { + const sourceCreatedAt = new Date('2023-10-01T10:00:00.000Z'); + const recentActivityDate = new Date(); + const staleActivityDate = new Date('2023-01-01T00:00:00.000Z'); + const publicPostBase = { + createdAt: staleActivityDate, + type: PostType.Article, + visible: true, + private: false, + deleted: false, + banned: false, + }; + + await con.getRepository(Source).save([ + { + id: 'qualified-source', + name: 'Qualified Source', + image: 'https://daily.dev/qualified-source.jpg', + handle: 'qualifiedsource', + type: SourceType.Machine, + active: true, + private: false, + createdAt: sourceCreatedAt, + }, + { + id: 'not-enough-posts-source', + name: 'Not Enough Posts Source', + image: 'https://daily.dev/not-enough-posts-source.jpg', + handle: 'notenoughposts', + type: SourceType.Machine, + active: true, + private: false, + }, + { + id: 'stale-source', + name: 'Stale Source', + image: 'https://daily.dev/stale-source.jpg', + handle: 'stalesource', + type: SourceType.Machine, + active: true, + private: false, + }, + { + id: 'private-source', + name: 'Private Source', + image: 'https://daily.dev/private-source.jpg', + handle: 'privatesource', + type: SourceType.Machine, + active: true, + private: true, + }, + { + id: 'inactive-source', + name: 'Inactive Source', + image: 'https://daily.dev/inactive-source.jpg', + handle: 'inactivesource', + type: SourceType.Machine, + active: false, + private: false, + }, + { + id: 'squad-source', + name: 'Squad Source', + image: 'https://daily.dev/squad-source.jpg', + handle: 'squadsource', + type: SourceType.Squad, + active: true, + private: false, + }, + ]); + + await con.getRepository(Post).insert([ + ...Array.from({ length: 9 }, (_, index) => ({ + ...publicPostBase, + id: `qualified-old-${index}`, + shortId: `qso${index}`, + title: `Qualified Old ${index}`, + sourceId: 'qualified-source', + })), + { + ...publicPostBase, + id: 'qualified-recent', + shortId: 'qsr', + title: 'Qualified Recent', + sourceId: 'qualified-source', + createdAt: recentActivityDate, + }, + ...Array.from({ length: 9 }, (_, index) => ({ + ...publicPostBase, + id: `notenough-${index}`, + shortId: `nes${index}`, + title: `Not Enough ${index}`, + sourceId: 'not-enough-posts-source', + })), + { + ...publicPostBase, + id: 'notenough-private', + shortId: 'nsp', + title: 'Not Enough Private', + sourceId: 'not-enough-posts-source', + private: true, + }, + ...Array.from({ length: 10 }, (_, index) => ({ + ...publicPostBase, + id: `stale-${index}`, + shortId: `sts${index}`, + title: `Stale ${index}`, + sourceId: 'stale-source', + })), + ...Array.from({ length: 10 }, (_, index) => ({ + ...publicPostBase, + id: `private-${index}`, + shortId: `prs${index}`, + title: `Private ${index}`, + sourceId: 'private-source', + createdAt: recentActivityDate, + })), + ...Array.from({ length: 10 }, (_, index) => ({ + ...publicPostBase, + id: `inactive-${index}`, + shortId: `ins${index}`, + title: `Inactive ${index}`, + sourceId: 'inactive-source', + createdAt: recentActivityDate, + })), + ...Array.from({ length: 10 }, (_, index) => ({ + ...publicPostBase, + id: `squad-${index}`, + shortId: `sqs${index}`, + title: `Squad ${index}`, + sourceId: 'squad-source', + createdAt: recentActivityDate, + })), + ]); + + const res = await request(app.server) + .get('/sitemaps/sources.xml') + .expect(200); + + expect(res.header['content-type']).toContain('application/xml'); + expect(res.header['cache-control']).toEqual( + 'public, max-age=7200, s-maxage=7200', + ); + expect(res.text).toContain( + '', + ); + expect(res.text).toContain( + 'http://localhost:5002/sources/qualifiedsource', + ); + expect(res.text).toContain('2023-10-01T10:00:00.000Z'); + expect(res.text).not.toContain('/sources/notenoughposts'); + expect(res.text).not.toContain('/sources/stalesource'); + expect(res.text).not.toContain('/sources/privatesource'); + expect(res.text).not.toContain('/sources/inactivesource'); + expect(res.text).not.toContain('/sources/squadsource'); + }); +}); + describe('GET /sitemaps/users.xml', () => { it('should include only qualified author profiles', async () => { const updatedAt = new Date('2024-01-01T12:00:00.123Z'); diff --git a/src/routes/sitemaps.ts b/src/routes/sitemaps.ts index fba8ee7598..73887c2e83 100644 --- a/src/routes/sitemaps.ts +++ b/src/routes/sitemaps.ts @@ -23,6 +23,7 @@ import { const SITEMAP_CACHE_CONTROL = `public, max-age=${2 * ONE_HOUR_IN_SECONDS}, s-maxage=${2 * ONE_HOUR_IN_SECONDS}`; const DEFAULT_SITEMAP_LIMIT = 50_000; +const QUALIFIED_SOURCE_MIN_PUBLIC_POSTS = 10; const ARENA_SITEMAP_GROUP_IDS = [ '385404b4-f0f4-4e81-a338-bdca851eca31', '970ab2c9-f845-4822-82f0-02169713b814', @@ -88,6 +89,9 @@ const getTagSitemapUrl = (prefix: string, value: string): string => const getAgentSitemapUrl = (prefix: string, entity: string): string => `${prefix}/agents/${encodeURIComponent(entity)}`; +const getSourceSitemapUrl = (prefix: string, handle: string): string => + `${prefix}/sources/${encodeURIComponent(handle)}`; + const getSquadSitemapUrl = (prefix: string, handle: string): string => `${prefix}/squads/${encodeURIComponent(handle)}`; @@ -287,6 +291,51 @@ const buildAgentsDigestSitemapQuery = ( .orderBy('p."createdAt"', 'DESC') .limit(DEFAULT_SITEMAP_LIMIT); +const buildSourcesSitemapQuery = ( + source: DataSource | EntityManager, +): SelectQueryBuilder => + source + .createQueryBuilder() + .select('s.handle', 'handle') + .addSelect('s."createdAt"', 'lastmod') + .from(Source, 's') + .where('s.type = :type', { type: SourceType.Machine }) + .andWhere('s.active = true') + .andWhere('s.private = false') + .andWhere((qb) => { + const publicPostsCountSubQuery = qb + .subQuery() + .select('COUNT(*)') + .from(Post, 'p') + .where('p."sourceId" = s.id') + .andWhere('p.deleted = false') + .andWhere('p.visible = true') + .andWhere('p.private = false') + .andWhere('p.banned = false') + .getQuery(); + + return `${publicPostsCountSubQuery} >= :minPublicPosts`; + }) + .andWhere((qb) => { + const recentPublicPostSubQuery = qb + .subQuery() + .select('1') + .from(Post, 'p') + .where('p."sourceId" = s.id') + .andWhere('p.deleted = false') + .andWhere('p.visible = true') + .andWhere('p.private = false') + .andWhere('p.banned = false') + .andWhere('p."createdAt" >= current_timestamp - interval \'12 months\'') + .getQuery(); + + return `EXISTS ${recentPublicPostSubQuery}`; + }) + .orderBy('s."createdAt"', 'DESC') + .addOrderBy('s.handle', 'ASC') + .limit(DEFAULT_SITEMAP_LIMIT) + .setParameter('minPublicPosts', QUALIFIED_SOURCE_MIN_PUBLIC_POSTS); + const buildSquadsSitemapQuery = ( source: DataSource | EntityManager, ): SelectQueryBuilder => @@ -374,6 +423,9 @@ ${evergreenSitemaps} ${escapeXml(`${prefix}/api/sitemaps/agents-digest.xml`)} + + ${escapeXml(`${prefix}/api/sitemaps/sources.xml`)} + ${escapeXml(`${prefix}/api/sitemaps/squads.xml`)} @@ -549,6 +601,23 @@ export default async function (fastify: FastifyInstance): Promise { ); }); + fastify.get('/sources.xml', async (_, res) => { + const con = await createOrGetConnection(); + const prefix = getSitemapUrlPrefix(); + const input = await streamReplicaQuery(con, buildSourcesSitemapQuery); + + return res + .type('application/xml') + .header('cache-control', SITEMAP_CACHE_CONTROL) + .send( + toSitemapUrlSetStream( + input, + (row) => getSourceSitemapUrl(prefix, row.handle), + getSitemapRowLastmod, + ), + ); + }); + fastify.get('/squads.xml', async (_, res) => { const con = await createOrGetConnection(); const prefix = getSitemapUrlPrefix(); From 30e02a368d083b25bdcdd45c0375edc5f7b49cfe Mon Sep 17 00:00:00 2001 From: idoshamun Date: Tue, 31 Mar 2026 12:04:52 +0000 Subject: [PATCH 2/2] refactor(seo): simplify sources sitemap query --- __tests__/sitemaps.ts | 114 ++++++++++++++++++----------------------- src/routes/sitemaps.ts | 43 +++++----------- 2 files changed, 64 insertions(+), 93 deletions(-) diff --git a/__tests__/sitemaps.ts b/__tests__/sitemaps.ts index cc9813eb97..c49cffa300 100644 --- a/__tests__/sitemaps.ts +++ b/__tests__/sitemaps.ts @@ -110,6 +110,26 @@ const sentimentEntitiesFixture: DeepPartial[] = [ }, ]; +const createSourcePostFixtures = ( + sourceId: string, + count: number, + prefix: string, + overrides?: (index: number) => DeepPartial, +): DeepPartial[] => + Array.from({ length: count }, (_, index) => ({ + id: `${prefix}-${index}`, + shortId: `${prefix.replace(/[^a-z0-9]/gi, '').slice(0, 10)}${index}`, + title: `${prefix} ${index}`, + sourceId, + createdAt: new Date('2023-01-01T00:00:00.000Z'), + type: PostType.Article, + visible: true, + private: false, + deleted: false, + banned: false, + ...overrides?.(index), + })); + beforeAll(async () => { process.env.SITEMAP_LIMIT = '2'; con = await createOrGetConnection(); @@ -348,15 +368,6 @@ describe('GET /sitemaps/sources.xml', () => { it('should include only qualified public machine sources', async () => { const sourceCreatedAt = new Date('2023-10-01T10:00:00.000Z'); const recentActivityDate = new Date(); - const staleActivityDate = new Date('2023-01-01T00:00:00.000Z'); - const publicPostBase = { - createdAt: staleActivityDate, - type: PostType.Article, - visible: true, - private: false, - deleted: false, - banned: false, - }; await con.getRepository(Source).save([ { @@ -417,65 +428,40 @@ describe('GET /sitemaps/sources.xml', () => { ]); await con.getRepository(Post).insert([ - ...Array.from({ length: 9 }, (_, index) => ({ - ...publicPostBase, - id: `qualified-old-${index}`, - shortId: `qso${index}`, - title: `Qualified Old ${index}`, - sourceId: 'qualified-source', - })), - { - ...publicPostBase, - id: 'qualified-recent', - shortId: 'qsr', - title: 'Qualified Recent', - sourceId: 'qualified-source', - createdAt: recentActivityDate, - }, - ...Array.from({ length: 9 }, (_, index) => ({ - ...publicPostBase, - id: `notenough-${index}`, - shortId: `nes${index}`, - title: `Not Enough ${index}`, - sourceId: 'not-enough-posts-source', - })), - { - ...publicPostBase, - id: 'notenough-private', - shortId: 'nsp', - title: 'Not Enough Private', - sourceId: 'not-enough-posts-source', - private: true, - }, - ...Array.from({ length: 10 }, (_, index) => ({ - ...publicPostBase, - id: `stale-${index}`, - shortId: `sts${index}`, - title: `Stale ${index}`, - sourceId: 'stale-source', - })), - ...Array.from({ length: 10 }, (_, index) => ({ - ...publicPostBase, - id: `private-${index}`, - shortId: `prs${index}`, - title: `Private ${index}`, - sourceId: 'private-source', + ...createSourcePostFixtures( + 'qualified-source', + 9, + 'qualified-old', + () => ({}), + ), + ...createSourcePostFixtures( + 'qualified-source', + 1, + 'qualified-recent', + () => ({ + createdAt: recentActivityDate, + }), + ), + ...createSourcePostFixtures( + 'not-enough-posts-source', + 9, + 'notenough', + () => ({}), + ), + ...createSourcePostFixtures( + 'not-enough-posts-source', + 1, + 'notenough-private', + () => ({ private: true }), + ), + ...createSourcePostFixtures('stale-source', 10, 'stale', () => ({})), + ...createSourcePostFixtures('private-source', 10, 'private', () => ({ createdAt: recentActivityDate, })), - ...Array.from({ length: 10 }, (_, index) => ({ - ...publicPostBase, - id: `inactive-${index}`, - shortId: `ins${index}`, - title: `Inactive ${index}`, - sourceId: 'inactive-source', + ...createSourcePostFixtures('inactive-source', 10, 'inactive', () => ({ createdAt: recentActivityDate, })), - ...Array.from({ length: 10 }, (_, index) => ({ - ...publicPostBase, - id: `squad-${index}`, - shortId: `sqs${index}`, - title: `Squad ${index}`, - sourceId: 'squad-source', + ...createSourcePostFixtures('squad-source', 10, 'squad', () => ({ createdAt: recentActivityDate, })), ]); diff --git a/src/routes/sitemaps.ts b/src/routes/sitemaps.ts index 73887c2e83..54c8f0c737 100644 --- a/src/routes/sitemaps.ts +++ b/src/routes/sitemaps.ts @@ -299,38 +299,23 @@ const buildSourcesSitemapQuery = ( .select('s.handle', 'handle') .addSelect('s."createdAt"', 'lastmod') .from(Source, 's') + .innerJoin( + Post, + 'p', + `p."sourceId" = s.id + AND p.deleted = false + AND p.visible = true + AND p.private = false + AND p.banned = false`, + ) .where('s.type = :type', { type: SourceType.Machine }) .andWhere('s.active = true') .andWhere('s.private = false') - .andWhere((qb) => { - const publicPostsCountSubQuery = qb - .subQuery() - .select('COUNT(*)') - .from(Post, 'p') - .where('p."sourceId" = s.id') - .andWhere('p.deleted = false') - .andWhere('p.visible = true') - .andWhere('p.private = false') - .andWhere('p.banned = false') - .getQuery(); - - return `${publicPostsCountSubQuery} >= :minPublicPosts`; - }) - .andWhere((qb) => { - const recentPublicPostSubQuery = qb - .subQuery() - .select('1') - .from(Post, 'p') - .where('p."sourceId" = s.id') - .andWhere('p.deleted = false') - .andWhere('p.visible = true') - .andWhere('p.private = false') - .andWhere('p.banned = false') - .andWhere('p."createdAt" >= current_timestamp - interval \'12 months\'') - .getQuery(); - - return `EXISTS ${recentPublicPostSubQuery}`; - }) + .groupBy('s.id') + .addGroupBy('s.handle') + .addGroupBy('s."createdAt"') + .having('COUNT(*) >= :minPublicPosts') + .andHaving(`MAX(p."createdAt") >= current_timestamp - interval '12 months'`) .orderBy('s."createdAt"', 'DESC') .addOrderBy('s.handle', 'ASC') .limit(DEFAULT_SITEMAP_LIMIT)