Skip to content

Commit f632945

Browse files
authored
feat(seo): add sources sitemap (#3762)
1 parent 062e1cf commit f632945

2 files changed

Lines changed: 203 additions & 0 deletions

File tree

__tests__/sitemaps.ts

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
SentimentEntity,
1515
SentimentGroup,
1616
Source,
17+
SourceType,
1718
User,
1819
} from '../src/entity';
1920
import { getSitemapRowLastmod } from '../src/routes/sitemaps';
@@ -109,6 +110,26 @@ const sentimentEntitiesFixture: DeepPartial<SentimentEntity>[] = [
109110
},
110111
];
111112

113+
const createSourcePostFixtures = (
114+
sourceId: string,
115+
count: number,
116+
prefix: string,
117+
overrides?: (index: number) => DeepPartial<Post>,
118+
): DeepPartial<Post>[] =>
119+
Array.from({ length: count }, (_, index) => ({
120+
id: `${prefix}-${index}`,
121+
shortId: `${prefix.replace(/[^a-z0-9]/gi, '').slice(0, 10)}${index}`,
122+
title: `${prefix} ${index}`,
123+
sourceId,
124+
createdAt: new Date('2023-01-01T00:00:00.000Z'),
125+
type: PostType.Article,
126+
visible: true,
127+
private: false,
128+
deleted: false,
129+
banned: false,
130+
...overrides?.(index),
131+
}));
132+
112133
beforeAll(async () => {
113134
process.env.SITEMAP_LIMIT = '2';
114135
con = await createOrGetConnection();
@@ -328,6 +349,9 @@ describe('GET /sitemaps/index.xml', () => {
328349
expect(res.text).toContain(
329350
'<loc>http://localhost:5002/api/sitemaps/agents-digest.xml</loc>',
330351
);
352+
expect(res.text).toContain(
353+
'<loc>http://localhost:5002/api/sitemaps/sources.xml</loc>',
354+
);
331355
expect(res.text).toContain(
332356
'<loc>http://localhost:5002/api/sitemaps/squads.xml</loc>',
333357
);
@@ -340,6 +364,131 @@ describe('GET /sitemaps/index.xml', () => {
340364
});
341365
});
342366

367+
describe('GET /sitemaps/sources.xml', () => {
368+
it('should include only qualified public machine sources', async () => {
369+
const sourceCreatedAt = new Date('2023-10-01T10:00:00.000Z');
370+
const recentActivityDate = new Date();
371+
372+
await con.getRepository(Source).save([
373+
{
374+
id: 'qualified-source',
375+
name: 'Qualified Source',
376+
image: 'https://daily.dev/qualified-source.jpg',
377+
handle: 'qualifiedsource',
378+
type: SourceType.Machine,
379+
active: true,
380+
private: false,
381+
createdAt: sourceCreatedAt,
382+
},
383+
{
384+
id: 'not-enough-posts-source',
385+
name: 'Not Enough Posts Source',
386+
image: 'https://daily.dev/not-enough-posts-source.jpg',
387+
handle: 'notenoughposts',
388+
type: SourceType.Machine,
389+
active: true,
390+
private: false,
391+
},
392+
{
393+
id: 'stale-source',
394+
name: 'Stale Source',
395+
image: 'https://daily.dev/stale-source.jpg',
396+
handle: 'stalesource',
397+
type: SourceType.Machine,
398+
active: true,
399+
private: false,
400+
},
401+
{
402+
id: 'private-source',
403+
name: 'Private Source',
404+
image: 'https://daily.dev/private-source.jpg',
405+
handle: 'privatesource',
406+
type: SourceType.Machine,
407+
active: true,
408+
private: true,
409+
},
410+
{
411+
id: 'inactive-source',
412+
name: 'Inactive Source',
413+
image: 'https://daily.dev/inactive-source.jpg',
414+
handle: 'inactivesource',
415+
type: SourceType.Machine,
416+
active: false,
417+
private: false,
418+
},
419+
{
420+
id: 'squad-source',
421+
name: 'Squad Source',
422+
image: 'https://daily.dev/squad-source.jpg',
423+
handle: 'squadsource',
424+
type: SourceType.Squad,
425+
active: true,
426+
private: false,
427+
},
428+
]);
429+
430+
await con.getRepository(Post).insert([
431+
...createSourcePostFixtures(
432+
'qualified-source',
433+
9,
434+
'qualified-old',
435+
() => ({}),
436+
),
437+
...createSourcePostFixtures(
438+
'qualified-source',
439+
1,
440+
'qualified-recent',
441+
() => ({
442+
createdAt: recentActivityDate,
443+
}),
444+
),
445+
...createSourcePostFixtures(
446+
'not-enough-posts-source',
447+
9,
448+
'notenough',
449+
() => ({}),
450+
),
451+
...createSourcePostFixtures(
452+
'not-enough-posts-source',
453+
1,
454+
'notenough-private',
455+
() => ({ private: true }),
456+
),
457+
...createSourcePostFixtures('stale-source', 10, 'stale', () => ({})),
458+
...createSourcePostFixtures('private-source', 10, 'private', () => ({
459+
createdAt: recentActivityDate,
460+
})),
461+
...createSourcePostFixtures('inactive-source', 10, 'inactive', () => ({
462+
createdAt: recentActivityDate,
463+
})),
464+
...createSourcePostFixtures('squad-source', 10, 'squad', () => ({
465+
createdAt: recentActivityDate,
466+
})),
467+
]);
468+
469+
const res = await request(app.server)
470+
.get('/sitemaps/sources.xml')
471+
.expect(200);
472+
473+
expect(res.header['content-type']).toContain('application/xml');
474+
expect(res.header['cache-control']).toEqual(
475+
'public, max-age=7200, s-maxage=7200',
476+
);
477+
expect(res.text).toContain(
478+
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
479+
);
480+
expect(res.text).toContain(
481+
'<loc>http://localhost:5002/sources/qualifiedsource</loc>',
482+
);
483+
expect(res.text).toContain('<lastmod>2023-10-01T10:00:00.000Z</lastmod>');
484+
expect(res.text).not.toContain('/sources/notenoughposts');
485+
expect(res.text).not.toContain('/sources/stalesource');
486+
expect(res.text).not.toContain('/sources/privatesource');
487+
expect(res.text).not.toContain('/sources/inactivesource');
488+
expect(res.text).not.toContain('/sources/squadsource');
489+
});
490+
});
491+
343492
describe('GET /sitemaps/users.xml', () => {
344493
it('should include only qualified author profiles', async () => {
345494
const updatedAt = new Date('2024-01-01T12:00:00.123Z');

src/routes/sitemaps.ts

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import {
2323

2424
const SITEMAP_CACHE_CONTROL = `public, max-age=${2 * ONE_HOUR_IN_SECONDS}, s-maxage=${2 * ONE_HOUR_IN_SECONDS}`;
2525
const DEFAULT_SITEMAP_LIMIT = 50_000;
26+
const QUALIFIED_SOURCE_MIN_PUBLIC_POSTS = 10;
2627
const ARENA_SITEMAP_GROUP_IDS = [
2728
'385404b4-f0f4-4e81-a338-bdca851eca31',
2829
'970ab2c9-f845-4822-82f0-02169713b814',
@@ -88,6 +89,9 @@ const getTagSitemapUrl = (prefix: string, value: string): string =>
8889
const getAgentSitemapUrl = (prefix: string, entity: string): string =>
8990
`${prefix}/agents/${encodeURIComponent(entity)}`;
9091

92+
const getSourceSitemapUrl = (prefix: string, handle: string): string =>
93+
`${prefix}/sources/${encodeURIComponent(handle)}`;
94+
9195
const getSquadSitemapUrl = (prefix: string, handle: string): string =>
9296
`${prefix}/squads/${encodeURIComponent(handle)}`;
9397

@@ -287,6 +291,36 @@ const buildAgentsDigestSitemapQuery = (
287291
.orderBy('p."createdAt"', 'DESC')
288292
.limit(DEFAULT_SITEMAP_LIMIT);
289293

294+
const buildSourcesSitemapQuery = (
295+
source: DataSource | EntityManager,
296+
): SelectQueryBuilder<Source> =>
297+
source
298+
.createQueryBuilder()
299+
.select('s.handle', 'handle')
300+
.addSelect('s."createdAt"', 'lastmod')
301+
.from(Source, 's')
302+
.innerJoin(
303+
Post,
304+
'p',
305+
`p."sourceId" = s.id
306+
AND p.deleted = false
307+
AND p.visible = true
308+
AND p.private = false
309+
AND p.banned = false`,
310+
)
311+
.where('s.type = :type', { type: SourceType.Machine })
312+
.andWhere('s.active = true')
313+
.andWhere('s.private = false')
314+
.groupBy('s.id')
315+
.addGroupBy('s.handle')
316+
.addGroupBy('s."createdAt"')
317+
.having('COUNT(*) >= :minPublicPosts')
318+
.andHaving(`MAX(p."createdAt") >= current_timestamp - interval '12 months'`)
319+
.orderBy('s."createdAt"', 'DESC')
320+
.addOrderBy('s.handle', 'ASC')
321+
.limit(DEFAULT_SITEMAP_LIMIT)
322+
.setParameter('minPublicPosts', QUALIFIED_SOURCE_MIN_PUBLIC_POSTS);
323+
290324
const buildSquadsSitemapQuery = (
291325
source: DataSource | EntityManager,
292326
): SelectQueryBuilder<Source> =>
@@ -374,6 +408,9 @@ ${evergreenSitemaps}
374408
<sitemap>
375409
<loc>${escapeXml(`${prefix}/api/sitemaps/agents-digest.xml`)}</loc>
376410
</sitemap>
411+
<sitemap>
412+
<loc>${escapeXml(`${prefix}/api/sitemaps/sources.xml`)}</loc>
413+
</sitemap>
377414
<sitemap>
378415
<loc>${escapeXml(`${prefix}/api/sitemaps/squads.xml`)}</loc>
379416
</sitemap>
@@ -549,6 +586,23 @@ export default async function (fastify: FastifyInstance): Promise<void> {
549586
);
550587
});
551588

589+
fastify.get('/sources.xml', async (_, res) => {
590+
const con = await createOrGetConnection();
591+
const prefix = getSitemapUrlPrefix();
592+
const input = await streamReplicaQuery(con, buildSourcesSitemapQuery);
593+
594+
return res
595+
.type('application/xml')
596+
.header('cache-control', SITEMAP_CACHE_CONTROL)
597+
.send(
598+
toSitemapUrlSetStream(
599+
input,
600+
(row) => getSourceSitemapUrl(prefix, row.handle),
601+
getSitemapRowLastmod,
602+
),
603+
);
604+
});
605+
552606
fastify.get('/squads.xml', async (_, res) => {
553607
const con = await createOrGetConnection();
554608
const prefix = getSitemapUrlPrefix();

0 commit comments

Comments
 (0)