Skip to content

Commit 797e419

Browse files
authored
feat: add archive sitemaps for best-of pages (#3784)
1 parent 4f59bc8 commit 797e419

2 files changed

Lines changed: 553 additions & 7 deletions

File tree

__tests__/sitemaps.ts

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { DataSource, DeepPartial } from 'typeorm';
77
import createOrGetConnection from '../src/db';
88
import {
99
AGENTS_DIGEST_SOURCE,
10+
Archive,
1011
CollectionPost,
1112
Keyword,
1213
KeywordStatus,
@@ -18,6 +19,12 @@ import {
1819
SourceType,
1920
User,
2021
} from '../src/entity';
22+
import {
23+
ArchivePeriodType,
24+
ArchiveRankingType,
25+
ArchiveScopeType,
26+
ArchiveSubjectType,
27+
} from '../src/common/archive';
2128
import { getSitemapRowLastmod } from '../src/routes/sitemaps';
2229
import { updateFlagsStatement } from '../src/common/utils';
2330
import { sourcesFixture } from './fixture/source';
@@ -1055,6 +1062,306 @@ describe('GET /sitemaps/evergreen.xml', () => {
10551062
});
10561063
});
10571064

1065+
describe('GET /sitemaps/archive-index.xml', () => {
1066+
const archiveBase = {
1067+
subjectType: ArchiveSubjectType.Post,
1068+
rankingType: ArchiveRankingType.Best,
1069+
};
1070+
1071+
it('should return index pages for tags and sources with archives', async () => {
1072+
const createdAt = new Date('2025-03-01T10:00:00.000Z');
1073+
1074+
await con.getRepository(Archive).save([
1075+
{
1076+
...archiveBase,
1077+
scopeType: ArchiveScopeType.Tag,
1078+
scopeId: 'rust',
1079+
periodType: ArchivePeriodType.Month,
1080+
periodStart: new Date('2025-01-01T00:00:00.000Z'),
1081+
createdAt,
1082+
},
1083+
{
1084+
...archiveBase,
1085+
scopeType: ArchiveScopeType.Tag,
1086+
scopeId: 'rust',
1087+
periodType: ArchivePeriodType.Month,
1088+
periodStart: new Date('2025-02-01T00:00:00.000Z'),
1089+
createdAt,
1090+
},
1091+
{
1092+
...archiveBase,
1093+
scopeType: ArchiveScopeType.Source,
1094+
scopeId: 'a',
1095+
periodType: ArchivePeriodType.Month,
1096+
periodStart: new Date('2025-01-01T00:00:00.000Z'),
1097+
createdAt,
1098+
},
1099+
{
1100+
...archiveBase,
1101+
scopeType: ArchiveScopeType.Global,
1102+
scopeId: null,
1103+
periodType: ArchivePeriodType.Month,
1104+
periodStart: new Date('2025-01-01T00:00:00.000Z'),
1105+
createdAt,
1106+
},
1107+
]);
1108+
1109+
const res = await request(app.server)
1110+
.get('/sitemaps/archive-index.xml')
1111+
.expect(200);
1112+
1113+
expect(res.header['content-type']).toContain('application/xml');
1114+
expect(res.header['cache-control']).toEqual(
1115+
'public, max-age=7200, s-maxage=7200',
1116+
);
1117+
expect(res.text).toContain(
1118+
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
1119+
);
1120+
// Source 'a' has handle 'a'
1121+
expect(res.text).toContain(
1122+
'<loc>http://localhost:5002/sources/a/best-of</loc>',
1123+
);
1124+
// Tag rust should appear once (deduplicated)
1125+
expect(res.text).toContain(
1126+
'<loc>http://localhost:5002/tags/rust/best-of</loc>',
1127+
);
1128+
// Global archives should not appear
1129+
expect(res.text).not.toContain('/best-of</loc>\n');
1130+
// Only one entry for rust (two archives but one index)
1131+
const rustMatches = res.text.match(/\/tags\/rust\/best-of<\/loc>/g);
1132+
expect(rustMatches).toHaveLength(1);
1133+
});
1134+
1135+
it('should exclude source archives when the source has been deleted', async () => {
1136+
await con.getRepository(Archive).save([
1137+
{
1138+
...archiveBase,
1139+
scopeType: ArchiveScopeType.Source,
1140+
scopeId: 'nonexistent-source',
1141+
periodType: ArchivePeriodType.Month,
1142+
periodStart: new Date('2025-01-01T00:00:00.000Z'),
1143+
createdAt: new Date(),
1144+
},
1145+
]);
1146+
1147+
const res = await request(app.server)
1148+
.get('/sitemaps/archive-index.xml')
1149+
.expect(200);
1150+
1151+
expect(res.text).not.toContain('/sources/nonexistent-source/best-of');
1152+
});
1153+
});
1154+
1155+
describe('GET /sitemaps/archive-pages-:scopeType-:periodType-:page.xml', () => {
1156+
const archiveBase = {
1157+
subjectType: ArchiveSubjectType.Post,
1158+
rankingType: ArchiveRankingType.Best,
1159+
};
1160+
1161+
it('should return tag monthly archive pages with correct URL format', async () => {
1162+
const createdAt = new Date('2025-04-01T10:00:00.000Z');
1163+
1164+
await con.getRepository(Archive).save([
1165+
{
1166+
...archiveBase,
1167+
scopeType: ArchiveScopeType.Tag,
1168+
scopeId: 'golang',
1169+
periodType: ArchivePeriodType.Month,
1170+
periodStart: new Date('2025-01-01T00:00:00.000Z'),
1171+
createdAt,
1172+
},
1173+
{
1174+
...archiveBase,
1175+
scopeType: ArchiveScopeType.Tag,
1176+
scopeId: 'golang',
1177+
periodType: ArchivePeriodType.Year,
1178+
periodStart: new Date('2024-01-01T00:00:00.000Z'),
1179+
createdAt,
1180+
},
1181+
]);
1182+
1183+
const res = await request(app.server)
1184+
.get('/sitemaps/archive-pages-tag-month-0.xml')
1185+
.expect(200);
1186+
1187+
expect(res.header['content-type']).toContain('application/xml');
1188+
expect(res.header['cache-control']).toEqual(
1189+
'public, max-age=7200, s-maxage=7200',
1190+
);
1191+
expect(res.text).toContain(
1192+
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
1193+
);
1194+
// Monthly tag archive with zero-padded month
1195+
expect(res.text).toContain(
1196+
'<loc>http://localhost:5002/tags/golang/best-of/2025/01</loc>',
1197+
);
1198+
// Should not include yearly archives
1199+
expect(res.text).not.toContain(
1200+
'<loc>http://localhost:5002/tags/golang/best-of/2024</loc>',
1201+
);
1202+
// Lastmod should be present
1203+
expect(res.text).toContain('<lastmod>');
1204+
});
1205+
1206+
it('should return tag yearly archive pages', async () => {
1207+
const createdAt = new Date('2025-04-01T10:00:00.000Z');
1208+
1209+
await con.getRepository(Archive).save([
1210+
{
1211+
...archiveBase,
1212+
scopeType: ArchiveScopeType.Tag,
1213+
scopeId: 'golang',
1214+
periodType: ArchivePeriodType.Year,
1215+
periodStart: new Date('2024-01-01T00:00:00.000Z'),
1216+
createdAt,
1217+
},
1218+
]);
1219+
1220+
const res = await request(app.server)
1221+
.get('/sitemaps/archive-pages-tag-year-0.xml')
1222+
.expect(200);
1223+
1224+
expect(res.text).toContain(
1225+
'<loc>http://localhost:5002/tags/golang/best-of/2024</loc>',
1226+
);
1227+
});
1228+
1229+
it('should return source monthly archive pages using handle', async () => {
1230+
const createdAt = new Date('2025-04-01T10:00:00.000Z');
1231+
1232+
await con.getRepository(Archive).save([
1233+
{
1234+
...archiveBase,
1235+
scopeType: ArchiveScopeType.Source,
1236+
scopeId: 'b',
1237+
periodType: ArchivePeriodType.Month,
1238+
periodStart: new Date('2025-09-01T00:00:00.000Z'),
1239+
createdAt,
1240+
},
1241+
]);
1242+
1243+
const res = await request(app.server)
1244+
.get('/sitemaps/archive-pages-source-month-0.xml')
1245+
.expect(200);
1246+
1247+
// Source archive uses handle (source 'b' has handle 'b')
1248+
expect(res.text).toContain(
1249+
'<loc>http://localhost:5002/sources/b/best-of/2025/09</loc>',
1250+
);
1251+
});
1252+
1253+
it('should return 404 for invalid scopeType', async () => {
1254+
await request(app.server)
1255+
.get('/sitemaps/archive-pages-invalid-month-0.xml')
1256+
.expect(404);
1257+
});
1258+
1259+
it('should return 404 for invalid periodType', async () => {
1260+
await request(app.server)
1261+
.get('/sitemaps/archive-pages-tag-invalid-0.xml')
1262+
.expect(404);
1263+
});
1264+
1265+
it('should return 404 for negative page', async () => {
1266+
await request(app.server)
1267+
.get('/sitemaps/archive-pages-tag-month--1.xml')
1268+
.expect(404);
1269+
});
1270+
1271+
it('should return 404 for non-integer page', async () => {
1272+
await request(app.server)
1273+
.get('/sitemaps/archive-pages-tag-month-abc.xml')
1274+
.expect(404);
1275+
});
1276+
1277+
it('should return empty urlset for page beyond data', async () => {
1278+
const res = await request(app.server)
1279+
.get('/sitemaps/archive-pages-tag-month-999.xml')
1280+
.expect(200);
1281+
1282+
expect(res.text).toContain(
1283+
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
1284+
);
1285+
expect(res.text).not.toContain('<loc>');
1286+
});
1287+
1288+
it('should exclude source archives when the source has been deleted', async () => {
1289+
await con.getRepository(Archive).save([
1290+
{
1291+
...archiveBase,
1292+
scopeType: ArchiveScopeType.Source,
1293+
scopeId: 'nonexistent-source',
1294+
periodType: ArchivePeriodType.Month,
1295+
periodStart: new Date('2025-01-01T00:00:00.000Z'),
1296+
createdAt: new Date(),
1297+
},
1298+
]);
1299+
1300+
const res = await request(app.server)
1301+
.get('/sitemaps/archive-pages-source-month-0.xml')
1302+
.expect(200);
1303+
1304+
expect(res.text).not.toContain('/sources/nonexistent-source/best-of');
1305+
});
1306+
});
1307+
1308+
describe('GET /sitemaps/index.xml (archive entries)', () => {
1309+
const archiveBase = {
1310+
subjectType: ArchiveSubjectType.Post,
1311+
rankingType: ArchiveRankingType.Best,
1312+
};
1313+
1314+
it('should include archive-index and paginated archive-pages sitemaps', async () => {
1315+
await con.getRepository(Archive).save([
1316+
{
1317+
...archiveBase,
1318+
scopeType: ArchiveScopeType.Tag,
1319+
scopeId: 'golang',
1320+
periodType: ArchivePeriodType.Month,
1321+
periodStart: new Date('2025-01-01T00:00:00.000Z'),
1322+
createdAt: new Date(),
1323+
},
1324+
{
1325+
...archiveBase,
1326+
scopeType: ArchiveScopeType.Source,
1327+
scopeId: 'a',
1328+
periodType: ArchivePeriodType.Year,
1329+
periodStart: new Date('2024-01-01T00:00:00.000Z'),
1330+
createdAt: new Date(),
1331+
},
1332+
]);
1333+
1334+
const res = await request(app.server)
1335+
.get('/sitemaps/index.xml')
1336+
.expect(200);
1337+
1338+
expect(res.text).toContain(
1339+
'<loc>http://localhost:5002/api/sitemaps/archive-index.xml</loc>',
1340+
);
1341+
expect(res.text).toContain(
1342+
'<loc>http://localhost:5002/api/sitemaps/archive-pages-tag-month-0.xml</loc>',
1343+
);
1344+
expect(res.text).toContain(
1345+
'<loc>http://localhost:5002/api/sitemaps/archive-pages-source-year-0.xml</loc>',
1346+
);
1347+
// Should not contain old non-paginated archive-pages.xml
1348+
expect(res.text).not.toContain(
1349+
'<loc>http://localhost:5002/api/sitemaps/archive-pages.xml</loc>',
1350+
);
1351+
});
1352+
1353+
it('should not include archive-pages entries when no archives exist', async () => {
1354+
const res = await request(app.server)
1355+
.get('/sitemaps/index.xml')
1356+
.expect(200);
1357+
1358+
expect(res.text).toContain(
1359+
'<loc>http://localhost:5002/api/sitemaps/archive-index.xml</loc>',
1360+
);
1361+
expect(res.text).not.toContain('archive-pages-');
1362+
});
1363+
});
1364+
10581365
describe('getSitemapRowLastmod', () => {
10591366
it('should normalize pg timestamp format to ISO-8601', () => {
10601367
const normalizedLastmod = getSitemapRowLastmod({

0 commit comments

Comments
 (0)