diff --git a/__tests__/sitemaps.ts b/__tests__/sitemaps.ts index 7229eec931..1a38075225 100644 --- a/__tests__/sitemaps.ts +++ b/__tests__/sitemaps.ts @@ -331,12 +331,223 @@ describe('GET /sitemaps/index.xml', () => { expect(res.text).toContain( 'http://localhost:5002/api/sitemaps/squads.xml', ); + expect(res.text).toContain( + 'http://localhost:5002/api/sitemaps/users.xml', + ); expect(res.text).toContain( 'http://localhost:5002/api/sitemaps/tags.xml', ); }); }); +describe('GET /sitemaps/users.xml', () => { + it('should include only qualified author profiles', async () => { + const updatedAt = new Date('2024-01-01T12:00:00.123Z'); + const userBase = { + createdAt: now, + infoConfirmed: true, + reputation: 20, + }; + const publicPostBase = { + sourceId: 'a', + createdAt: now, + type: PostType.Article, + visible: true, + private: false, + deleted: false, + }; + + await con.getRepository(User).save([ + { + ...userBase, + id: 'qualified-user', + name: 'Qualified User', + image: 'https://daily.dev/qualified.jpg', + username: 'qualifieduser', + email: 'qualified@test.com', + updatedAt, + reputation: 42, + bio: 'Writes public posts', + }, + { + ...userBase, + id: 'low-rep-user', + name: 'Low Rep User', + image: 'https://daily.dev/low-rep.jpg', + username: 'lowrepuser', + email: 'lowrep@test.com', + reputation: 10, + bio: 'Below threshold', + }, + { + ...userBase, + id: 'empty-bio-user', + name: 'Empty Bio User', + image: 'https://daily.dev/empty-bio.jpg', + username: 'emptybio', + email: 'emptybio@test.com', + bio: '', + }, + { + ...userBase, + id: 'null-bio-user', + name: 'Null Bio User', + image: 'https://daily.dev/null-bio.jpg', + username: 'nullbio', + email: 'nullbio@test.com', + bio: null, + }, + { + ...userBase, + id: 'blank-bio-user', + name: 'Blank Bio User', + image: 'https://daily.dev/blank-bio.jpg', + username: 'blankbio', + email: 'blankbio@test.com', + bio: ' ', + }, + { + ...userBase, + id: 'missing-username-user', + name: 'Missing Username User', + image: 'https://daily.dev/no-username.jpg', + email: 'nousername@test.com', + bio: 'Has no username', + }, + { + ...userBase, + id: 'private-post-user', + name: 'Private Post User', + image: 'https://daily.dev/private-post.jpg', + username: 'privatepost', + email: 'privatepost@test.com', + bio: 'Only private posts', + }, + { + ...userBase, + id: 'deleted-post-user', + name: 'Deleted Post User', + image: 'https://daily.dev/deleted-post.jpg', + username: 'deletedpost', + email: 'deletedpost@test.com', + bio: 'Only deleted posts', + }, + { + ...userBase, + id: 'hidden-post-user', + name: 'Hidden Post User', + image: 'https://daily.dev/hidden-post.jpg', + username: 'hiddenpost', + email: 'hiddenpost@test.com', + bio: 'Only hidden posts', + }, + { + ...userBase, + id: 'no-posts-user', + name: 'No Posts User', + image: 'https://daily.dev/no-posts.jpg', + username: 'noposts', + email: 'noposts@test.com', + bio: 'Has no posts', + }, + ]); + + await con.getRepository(Post).insert([ + { + ...publicPostBase, + id: 'qualified-user-post', + shortId: 'qup', + title: 'Qualified User Post', + metadataChangedAt: updatedAt, + authorId: 'qualified-user', + }, + { + ...publicPostBase, + id: 'low-rep-post', + shortId: 'lrp', + title: 'Low Rep Post', + authorId: 'low-rep-user', + }, + { + ...publicPostBase, + id: 'empty-bio-post', + shortId: 'ebp', + title: 'Empty Bio Post', + authorId: 'empty-bio-user', + }, + { + ...publicPostBase, + id: 'null-bio-post', + shortId: 'nbp', + title: 'Null Bio Post', + authorId: 'null-bio-user', + }, + { + ...publicPostBase, + id: 'blank-bio-post', + shortId: 'bbp', + title: 'Blank Bio Post', + authorId: 'blank-bio-user', + }, + { + ...publicPostBase, + id: 'missing-username-post', + shortId: 'mup', + title: 'Missing Username Post', + authorId: 'missing-username-user', + }, + { + ...publicPostBase, + id: 'private-post-only', + shortId: 'ppo', + title: 'Private Post Only', + authorId: 'private-post-user', + private: true, + }, + { + ...publicPostBase, + id: 'deleted-post-only', + shortId: 'dpo', + title: 'Deleted Post Only', + authorId: 'deleted-post-user', + deleted: true, + }, + { + ...publicPostBase, + id: 'hidden-post-only', + shortId: 'hpo', + title: 'Hidden Post Only', + authorId: 'hidden-post-user', + visible: false, + }, + ]); + + const res = await request(app.server) + .get('/sitemaps/users.xml') + .expect(200); + + expect(res.header['content-type']).toContain('application/xml'); + expect(res.header['cache-control']).toEqual( + 'public, max-age=7200, s-maxage=7200', + ); + expect(res.text).toContain( + '', + ); + expect(res.text).toContain( + 'http://localhost:5002/qualifieduser', + ); + expect(res.text).toContain('2024-01-01T12:00:00.123Z'); + expect(res.text).not.toContain('/lowrepuser'); + expect(res.text).not.toContain('/emptybio'); + expect(res.text).not.toContain('/nullbio'); + expect(res.text).not.toContain('/blankbio'); + expect(res.text).not.toContain('/privatepost'); + expect(res.text).not.toContain('/deletedpost'); + expect(res.text).not.toContain('/hiddenpost'); + expect(res.text).not.toContain('/noposts'); + }); +}); + describe('GET /sitemaps/agents.xml', () => { it('should return arena entity pages sitemap as xml', async () => { const res = await request(app.server) diff --git a/src/routes/sitemaps.ts b/src/routes/sitemaps.ts index af9583909c..fba8ee7598 100644 --- a/src/routes/sitemaps.ts +++ b/src/routes/sitemaps.ts @@ -10,6 +10,7 @@ import { User, } from '../entity'; import { AGENTS_DIGEST_SOURCE } from '../entity/Source'; +import { getUserProfileUrl } from '../common/users'; import createOrGetConnection from '../db'; import { Readable } from 'stream'; import { ONE_HOUR_IN_SECONDS } from '../common/constants'; @@ -301,6 +302,35 @@ const buildSquadsSitemapQuery = ( .orderBy('s."createdAt"', 'DESC') .limit(DEFAULT_SITEMAP_LIMIT); +const buildUsersSitemapQuery = ( + source: DataSource | EntityManager, +): SelectQueryBuilder => + source + .createQueryBuilder() + .select('u.username', 'username') + .addSelect('u."updatedAt"', 'lastmod') + .from(User, 'u') + .where('u.reputation > :minRep', { minRep: 10 }) + .andWhere('u.bio IS NOT NULL') + .andWhere(`btrim(u.bio) != ''`) + .andWhere('u.username IS NOT NULL') + .andWhere((qb) => { + const subQuery = qb + .subQuery() + .select('1') + .from(Post, 'p') + .where('p."authorId" = u.id') + .andWhere('p.deleted = false') + .andWhere('p.visible = true') + .andWhere('p.private = false') + .getQuery(); + + return `EXISTS ${subQuery}`; + }) + .orderBy('u.reputation', 'DESC') + .addOrderBy('u.username', 'ASC') + .limit(DEFAULT_SITEMAP_LIMIT); + const getPostsSitemapPath = (page: number): string => page === 1 ? '/api/sitemaps/posts-1.xml' : `/api/sitemaps/posts-${page}.xml`; @@ -347,6 +377,9 @@ ${evergreenSitemaps} ${escapeXml(`${prefix}/api/sitemaps/squads.xml`)} + + ${escapeXml(`${prefix}/api/sitemaps/users.xml`)} + `; }; @@ -533,6 +566,22 @@ export default async function (fastify: FastifyInstance): Promise { ); }); + fastify.get('/users.xml', async (_, res) => { + const con = await createOrGetConnection(); + const input = await streamReplicaQuery(con, buildUsersSitemapQuery); + + return res + .type('application/xml') + .header('cache-control', SITEMAP_CACHE_CONTROL) + .send( + toSitemapUrlSetStream( + input, + (row) => getUserProfileUrl(row.username), + getSitemapRowLastmod, + ), + ); + }); + fastify.get('/index.xml', async (_, res) => { const con = await createOrGetConnection(); const postsSitemapCount = getSitemapPageCount(