diff --git a/__tests__/sitemaps.ts b/__tests__/sitemaps.ts
index f4757c4847..7229eec931 100644
--- a/__tests__/sitemaps.ts
+++ b/__tests__/sitemaps.ts
@@ -269,6 +269,38 @@ describe('GET /sitemaps/tags.xml', () => {
describe('GET /sitemaps/index.xml', () => {
it('should return sitemap index xml with all paginated post sitemaps', async () => {
+ const oldDate = new Date(now.getTime() - 91 * ONE_DAY_IN_SECONDS * 1000);
+
+ await con.getRepository(Post).insert([
+ {
+ id: 'evergreen-index-1',
+ shortId: 'ei1',
+ title: 'Evergreen Index 1',
+ sourceId: 'a',
+ createdAt: oldDate,
+ type: PostType.Article,
+ upvotes: 10,
+ },
+ {
+ id: 'evergreen-index-2',
+ shortId: 'ei2',
+ title: 'Evergreen Index 2',
+ sourceId: 'a',
+ createdAt: new Date(oldDate.getTime() - 1000),
+ type: PostType.Article,
+ upvotes: 11,
+ },
+ {
+ id: 'evergreen-index-3',
+ shortId: 'ei3',
+ title: 'Evergreen Index 3',
+ sourceId: 'a',
+ createdAt: new Date(oldDate.getTime() - 2000),
+ type: PostType.Article,
+ upvotes: 12,
+ },
+ ]);
+
const res = await request(app.server)
.get('/sitemaps/index.xml')
.expect(200);
@@ -285,7 +317,10 @@ describe('GET /sitemaps/index.xml', () => {
'http://localhost:5002/api/sitemaps/posts-2.xml',
);
expect(res.text).toContain(
- 'http://localhost:5002/api/sitemaps/tags.xml',
+ 'http://localhost:5002/api/sitemaps/evergreen.xml',
+ );
+ expect(res.text).toContain(
+ 'http://localhost:5002/api/sitemaps/evergreen-2.xml',
);
expect(res.text).toContain(
'http://localhost:5002/api/sitemaps/agents.xml',
@@ -296,6 +331,9 @@ describe('GET /sitemaps/index.xml', () => {
expect(res.text).toContain(
'http://localhost:5002/api/sitemaps/squads.xml',
);
+ expect(res.text).toContain(
+ 'http://localhost:5002/api/sitemaps/tags.xml',
+ );
});
});
@@ -431,6 +469,77 @@ describe('GET /sitemaps/squads.xml', () => {
});
describe('GET /sitemaps/evergreen.xml', () => {
+ it('should include posts with at least 10 upvotes and paginate older posts', async () => {
+ const oldDate = new Date(now.getTime() - 91 * ONE_DAY_IN_SECONDS * 1000);
+
+ await con.getRepository(Post).insert([
+ {
+ id: 'evergreen-min-threshold',
+ shortId: 'emt',
+ title: 'Evergreen Min Threshold',
+ sourceId: 'a',
+ createdAt: oldDate,
+ type: PostType.Article,
+ upvotes: 10,
+ },
+ {
+ id: 'evergreen-next-page',
+ shortId: 'enp',
+ title: 'Evergreen Next Page',
+ sourceId: 'a',
+ createdAt: new Date(oldDate.getTime() - 1000),
+ type: PostType.Article,
+ upvotes: 11,
+ },
+ {
+ id: 'evergreen-third-page',
+ shortId: 'etp',
+ title: 'Evergreen Third Page',
+ sourceId: 'a',
+ createdAt: new Date(oldDate.getTime() - 2000),
+ type: PostType.Article,
+ upvotes: 12,
+ },
+ {
+ id: 'evergreen-below-threshold',
+ shortId: 'ebt',
+ title: 'Evergreen Below Threshold',
+ sourceId: 'a',
+ createdAt: new Date(oldDate.getTime() - 3000),
+ type: PostType.Article,
+ upvotes: 9,
+ },
+ ]);
+
+ const firstPage = await request(app.server)
+ .get('/sitemaps/evergreen.xml')
+ .expect(200);
+ const secondPage = await request(app.server)
+ .get('/sitemaps/evergreen-2.xml')
+ .expect(200);
+
+ expect(firstPage.header['content-type']).toContain('application/xml');
+ expect(firstPage.text).toContain(
+ '/posts/evergreen-third-page-evergreen-third-page',
+ );
+ expect(firstPage.text).toContain(
+ '/posts/evergreen-next-page-evergreen-next-page',
+ );
+ expect(firstPage.text).not.toContain(
+ '/posts/evergreen-min-threshold-evergreen-min-threshold',
+ );
+ expect(firstPage.text).not.toContain(
+ '/posts/evergreen-below-threshold-ebt',
+ );
+
+ expect(secondPage.text).toContain(
+ '/posts/evergreen-min-threshold-evergreen-min-threshold',
+ );
+ expect(secondPage.text).not.toContain(
+ '/posts/evergreen-below-threshold-ebt',
+ );
+ });
+
it('should exclude posts by low-reputation authors', async () => {
await con.getRepository(User).save({
id: 'low-rep-sitemap',
diff --git a/src/routes/sitemaps.ts b/src/routes/sitemaps.ts
index 409bb0feaa..af9583909c 100644
--- a/src/routes/sitemaps.ts
+++ b/src/routes/sitemaps.ts
@@ -27,7 +27,7 @@ const ARENA_SITEMAP_GROUP_IDS = [
'970ab2c9-f845-4822-82f0-02169713b814',
];
-const getPostsSitemapLimit = (): number => {
+const getPaginatedSitemapLimit = (): number => {
const limit = Number.parseInt(process.env.SITEMAP_LIMIT || '', 10);
return Number.isInteger(limit) && limit > 0 ? limit : DEFAULT_SITEMAP_LIMIT;
@@ -139,32 +139,35 @@ const applyPostsSitemapOrder = (
): SelectQueryBuilder =>
query.orderBy('p."createdAt"', 'ASC').addOrderBy('p.id', 'ASC');
-const buildPostsSitemapQuery = (
- source: DataSource | EntityManager,
+const applyPaginatedSitemapWindow = (
+ query: SelectQueryBuilder,
page: number,
): SelectQueryBuilder =>
- applyPostsSitemapOrder(
- buildPostsSitemapBaseQuery(source)
- .select('p.slug', 'slug')
- .addSelect('p."metadataChangedAt"', 'lastmod')
- .limit(getPostsSitemapLimit())
- .offset((page - 1) * getPostsSitemapLimit()),
- );
+ query
+ .limit(getPaginatedSitemapLimit())
+ .offset((page - 1) * getPaginatedSitemapLimit());
-const buildPostsSitemapTextQuery = (
+const buildPostsSitemapQuery = (
source: DataSource | EntityManager,
+ page: number,
): SelectQueryBuilder =>
- applyPostsSitemapOrder(
- buildPostsSitemapBaseQuery(source).select('p.slug', 'slug'),
+ applyPaginatedSitemapWindow(
+ applyPostsSitemapOrder(
+ buildPostsSitemapBaseQuery(source)
+ .select('p.slug', 'slug')
+ .addSelect('p."metadataChangedAt"', 'lastmod'),
+ ),
+ page,
);
-const buildPostSitemapStream = async (
+const buildPaginatedPostSitemapStream = async (
con: DataSource,
page: number,
+ buildQuery: (source: EntityManager, page: number) => SelectQueryBuilder,
): Promise => {
const prefix = getSitemapUrlPrefix();
const input = await streamReplicaQuery(con, (source) =>
- buildPostsSitemapQuery(source, page),
+ buildQuery(source, page),
);
return toSitemapUrlSetStream(
@@ -174,13 +177,53 @@ const buildPostSitemapStream = async (
);
};
-const buildEvergreenSitemapQuery = (
+const getSitemapPageCount = (totalPosts: number): number =>
+ Math.max(1, Math.ceil(totalPosts / getPaginatedSitemapLimit()));
+
+const getReplicaQueryCount = async (
+ con: DataSource,
+ buildQuery: (source: EntityManager) => SelectQueryBuilder,
+): Promise => {
+ const queryRunner = con.createQueryRunner('slave');
+
+ try {
+ return await buildQuery(queryRunner.manager).getCount();
+ } finally {
+ await queryRunner.release();
+ }
+};
+
+const buildSitemapIndexEntries = (
+ prefix: string,
+ sitemapCount: number,
+ getPath: (page: number) => string,
+): string =>
+ Array.from({ length: sitemapCount }, (_, index) => {
+ const page = index + 1;
+
+ return `
+ ${escapeXml(`${prefix}${getPath(page)}`)}
+ `;
+ }).join('\n');
+
+const buildPostsSitemapTextQuery = (
+ source: DataSource | EntityManager,
+): SelectQueryBuilder =>
+ applyPostsSitemapOrder(
+ buildPostsSitemapBaseQuery(source).select('p.slug', 'slug'),
+ );
+
+const buildPostSitemapStream = async (
+ con: DataSource,
+ page: number,
+): Promise =>
+ buildPaginatedPostSitemapStream(con, page, buildPostsSitemapQuery);
+
+const buildEvergreenSitemapBaseQuery = (
source: DataSource | EntityManager,
): SelectQueryBuilder =>
source
.createQueryBuilder()
- .select('p.slug', 'slug')
- .addSelect('p."metadataChangedAt"', 'lastmod')
.from(Post, 'p')
.leftJoin(User, 'u', 'p."authorId" = u.id')
.where('p.type NOT IN (:...types)', { types: [PostType.Welcome] })
@@ -188,10 +231,21 @@ const buildEvergreenSitemapQuery = (
.andWhere('NOT p.banned')
.andWhere('NOT p.deleted')
.andWhere('p."createdAt" <= current_timestamp - interval \'90 day\'')
- .andWhere('p.upvotes >= :minUpvotes', { minUpvotes: 50 })
- .andWhere('(u.id is null or u.reputation > 10)')
- .orderBy('p.upvotes', 'DESC')
- .limit(DEFAULT_SITEMAP_LIMIT);
+ .andWhere('p.upvotes >= :minUpvotes', { minUpvotes: 10 })
+ .andWhere('(u.id is null or u.reputation > 10)');
+
+const buildEvergreenSitemapQuery = (
+ source: DataSource | EntityManager,
+ page: number,
+): SelectQueryBuilder =>
+ applyPaginatedSitemapWindow(
+ buildEvergreenSitemapBaseQuery(source)
+ .select('p.slug', 'slug')
+ .addSelect('p."metadataChangedAt"', 'lastmod')
+ .orderBy('p."createdAt"', 'ASC')
+ .addOrderBy('p.id', 'ASC'),
+ page,
+ );
const buildTagsSitemapQuery = (
source: DataSource | EntityManager,
@@ -250,38 +304,37 @@ const buildSquadsSitemapQuery = (
const getPostsSitemapPath = (page: number): string =>
page === 1 ? '/api/sitemaps/posts-1.xml' : `/api/sitemaps/posts-${page}.xml`;
-const getPostsSitemapPageCount = (totalPosts: number): number =>
- Math.max(1, Math.ceil(totalPosts / getPostsSitemapLimit()));
+const getEvergreenSitemapPath = (page: number): string =>
+ page === 1
+ ? '/api/sitemaps/evergreen.xml'
+ : `/api/sitemaps/evergreen-${page}.xml`;
-const getPostsSitemapCount = async (con: DataSource): Promise => {
- const queryRunner = con.createQueryRunner('slave');
-
- try {
- return await buildPostsSitemapBaseQuery(queryRunner.manager).getCount();
- } finally {
- await queryRunner.release();
- }
-};
+const buildEvergreenSitemapStream = async (
+ con: DataSource,
+ page: number,
+): Promise =>
+ buildPaginatedPostSitemapStream(con, page, buildEvergreenSitemapQuery);
-const getSitemapIndexXml = (postsSitemapCount: number): string => {
+const getSitemapIndexXml = (
+ postsSitemapCount: number,
+ evergreenSitemapCount: number,
+): string => {
const prefix = getSitemapUrlPrefix();
- const postsSitemaps = Array.from(
- { length: postsSitemapCount },
- (_, index) => {
- const page = index + 1;
-
- return `
- ${escapeXml(`${prefix}${getPostsSitemapPath(page)}`)}
- `;
- },
- ).join('\n');
+ const postsSitemaps = buildSitemapIndexEntries(
+ prefix,
+ postsSitemapCount,
+ getPostsSitemapPath,
+ );
+ const evergreenSitemaps = buildSitemapIndexEntries(
+ prefix,
+ evergreenSitemapCount,
+ getEvergreenSitemapPath,
+ );
return `
${postsSitemaps}
-
- ${escapeXml(`${prefix}/api/sitemaps/evergreen.xml`)}
-
+${evergreenSitemaps}
${escapeXml(`${prefix}/api/sitemaps/tags.xml`)}
@@ -374,19 +427,28 @@ export default async function (fastify: FastifyInstance): Promise {
fastify.get('/evergreen.xml', async (_, res) => {
const con = await createOrGetConnection();
- const prefix = getSitemapUrlPrefix();
- const input = await streamReplicaQuery(con, buildEvergreenSitemapQuery);
return res
.type('application/xml')
.header('cache-control', SITEMAP_CACHE_CONTROL)
- .send(
- toSitemapUrlSetStream(
- input,
- (row) => getPostSitemapUrl(prefix, row.slug),
- getSitemapRowLastmod,
- ),
- );
+ .send(await buildEvergreenSitemapStream(con, 1));
+ });
+
+ fastify.get<{
+ Params: { page: string };
+ }>('/evergreen-:page.xml', async (req, res) => {
+ const page = Number.parseInt(req.params.page, 10);
+
+ if (!Number.isInteger(page) || page < 1) {
+ return res.code(404).send();
+ }
+
+ const con = await createOrGetConnection();
+
+ return res
+ .type('application/xml')
+ .header('cache-control', SITEMAP_CACHE_CONTROL)
+ .send(await buildEvergreenSitemapStream(con, page));
});
fastify.get('/tags.txt', async (_, res) => {
@@ -473,13 +535,16 @@ export default async function (fastify: FastifyInstance): Promise {
fastify.get('/index.xml', async (_, res) => {
const con = await createOrGetConnection();
- const postsSitemapCount = getPostsSitemapPageCount(
- await getPostsSitemapCount(con),
+ const postsSitemapCount = getSitemapPageCount(
+ await getReplicaQueryCount(con, buildPostsSitemapBaseQuery),
+ );
+ const evergreenSitemapCount = getSitemapPageCount(
+ await getReplicaQueryCount(con, buildEvergreenSitemapBaseQuery),
);
return res
.type('application/xml')
.header('cache-control', SITEMAP_CACHE_CONTROL)
- .send(getSitemapIndexXml(postsSitemapCount));
+ .send(getSitemapIndexXml(postsSitemapCount, evergreenSitemapCount));
});
}