@@ -7,6 +7,7 @@ import { DataSource, DeepPartial } from 'typeorm';
77import createOrGetConnection from '../src/db' ;
88import {
99 AGENTS_DIGEST_SOURCE ,
10+ Archive ,
1011 CollectionPost ,
1112 Keyword ,
1213 KeywordStatus ,
@@ -18,6 +19,12 @@ import {
1819 SourceType ,
1920 User ,
2021} from '../src/entity' ;
22+ import {
23+ ArchivePeriodType ,
24+ ArchiveRankingType ,
25+ ArchiveScopeType ,
26+ ArchiveSubjectType ,
27+ } from '../src/common/archive' ;
2128import { getSitemapRowLastmod } from '../src/routes/sitemaps' ;
2229import { updateFlagsStatement } from '../src/common/utils' ;
2330import { sourcesFixture } from './fixture/source' ;
@@ -1055,6 +1062,306 @@ describe('GET /sitemaps/evergreen.xml', () => {
10551062 } ) ;
10561063} ) ;
10571064
1065+ describe ( 'GET /sitemaps/archive-index.xml' , ( ) => {
1066+ const archiveBase = {
1067+ subjectType : ArchiveSubjectType . Post ,
1068+ rankingType : ArchiveRankingType . Best ,
1069+ } ;
1070+
1071+ it ( 'should return index pages for tags and sources with archives' , async ( ) => {
1072+ const createdAt = new Date ( '2025-03-01T10:00:00.000Z' ) ;
1073+
1074+ await con . getRepository ( Archive ) . save ( [
1075+ {
1076+ ...archiveBase ,
1077+ scopeType : ArchiveScopeType . Tag ,
1078+ scopeId : 'rust' ,
1079+ periodType : ArchivePeriodType . Month ,
1080+ periodStart : new Date ( '2025-01-01T00:00:00.000Z' ) ,
1081+ createdAt,
1082+ } ,
1083+ {
1084+ ...archiveBase ,
1085+ scopeType : ArchiveScopeType . Tag ,
1086+ scopeId : 'rust' ,
1087+ periodType : ArchivePeriodType . Month ,
1088+ periodStart : new Date ( '2025-02-01T00:00:00.000Z' ) ,
1089+ createdAt,
1090+ } ,
1091+ {
1092+ ...archiveBase ,
1093+ scopeType : ArchiveScopeType . Source ,
1094+ scopeId : 'a' ,
1095+ periodType : ArchivePeriodType . Month ,
1096+ periodStart : new Date ( '2025-01-01T00:00:00.000Z' ) ,
1097+ createdAt,
1098+ } ,
1099+ {
1100+ ...archiveBase ,
1101+ scopeType : ArchiveScopeType . Global ,
1102+ scopeId : null ,
1103+ periodType : ArchivePeriodType . Month ,
1104+ periodStart : new Date ( '2025-01-01T00:00:00.000Z' ) ,
1105+ createdAt,
1106+ } ,
1107+ ] ) ;
1108+
1109+ const res = await request ( app . server )
1110+ . get ( '/sitemaps/archive-index.xml' )
1111+ . expect ( 200 ) ;
1112+
1113+ expect ( res . header [ 'content-type' ] ) . toContain ( 'application/xml' ) ;
1114+ expect ( res . header [ 'cache-control' ] ) . toEqual (
1115+ 'public, max-age=7200, s-maxage=7200' ,
1116+ ) ;
1117+ expect ( res . text ) . toContain (
1118+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' ,
1119+ ) ;
1120+ // Source 'a' has handle 'a'
1121+ expect ( res . text ) . toContain (
1122+ '<loc>http://localhost:5002/sources/a/best-of</loc>' ,
1123+ ) ;
1124+ // Tag rust should appear once (deduplicated)
1125+ expect ( res . text ) . toContain (
1126+ '<loc>http://localhost:5002/tags/rust/best-of</loc>' ,
1127+ ) ;
1128+ // Global archives should not appear
1129+ expect ( res . text ) . not . toContain ( '/best-of</loc>\n' ) ;
1130+ // Only one entry for rust (two archives but one index)
1131+ const rustMatches = res . text . match ( / \/ t a g s \/ r u s t \/ b e s t - o f < \/ l o c > / g) ;
1132+ expect ( rustMatches ) . toHaveLength ( 1 ) ;
1133+ } ) ;
1134+
1135+ it ( 'should exclude source archives when the source has been deleted' , async ( ) => {
1136+ await con . getRepository ( Archive ) . save ( [
1137+ {
1138+ ...archiveBase ,
1139+ scopeType : ArchiveScopeType . Source ,
1140+ scopeId : 'nonexistent-source' ,
1141+ periodType : ArchivePeriodType . Month ,
1142+ periodStart : new Date ( '2025-01-01T00:00:00.000Z' ) ,
1143+ createdAt : new Date ( ) ,
1144+ } ,
1145+ ] ) ;
1146+
1147+ const res = await request ( app . server )
1148+ . get ( '/sitemaps/archive-index.xml' )
1149+ . expect ( 200 ) ;
1150+
1151+ expect ( res . text ) . not . toContain ( '/sources/nonexistent-source/best-of' ) ;
1152+ } ) ;
1153+ } ) ;
1154+
1155+ describe ( 'GET /sitemaps/archive-pages-:scopeType-:periodType-:page.xml' , ( ) => {
1156+ const archiveBase = {
1157+ subjectType : ArchiveSubjectType . Post ,
1158+ rankingType : ArchiveRankingType . Best ,
1159+ } ;
1160+
1161+ it ( 'should return tag monthly archive pages with correct URL format' , async ( ) => {
1162+ const createdAt = new Date ( '2025-04-01T10:00:00.000Z' ) ;
1163+
1164+ await con . getRepository ( Archive ) . save ( [
1165+ {
1166+ ...archiveBase ,
1167+ scopeType : ArchiveScopeType . Tag ,
1168+ scopeId : 'golang' ,
1169+ periodType : ArchivePeriodType . Month ,
1170+ periodStart : new Date ( '2025-01-01T00:00:00.000Z' ) ,
1171+ createdAt,
1172+ } ,
1173+ {
1174+ ...archiveBase ,
1175+ scopeType : ArchiveScopeType . Tag ,
1176+ scopeId : 'golang' ,
1177+ periodType : ArchivePeriodType . Year ,
1178+ periodStart : new Date ( '2024-01-01T00:00:00.000Z' ) ,
1179+ createdAt,
1180+ } ,
1181+ ] ) ;
1182+
1183+ const res = await request ( app . server )
1184+ . get ( '/sitemaps/archive-pages-tag-month-0.xml' )
1185+ . expect ( 200 ) ;
1186+
1187+ expect ( res . header [ 'content-type' ] ) . toContain ( 'application/xml' ) ;
1188+ expect ( res . header [ 'cache-control' ] ) . toEqual (
1189+ 'public, max-age=7200, s-maxage=7200' ,
1190+ ) ;
1191+ expect ( res . text ) . toContain (
1192+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' ,
1193+ ) ;
1194+ // Monthly tag archive with zero-padded month
1195+ expect ( res . text ) . toContain (
1196+ '<loc>http://localhost:5002/tags/golang/best-of/2025/01</loc>' ,
1197+ ) ;
1198+ // Should not include yearly archives
1199+ expect ( res . text ) . not . toContain (
1200+ '<loc>http://localhost:5002/tags/golang/best-of/2024</loc>' ,
1201+ ) ;
1202+ // Lastmod should be present
1203+ expect ( res . text ) . toContain ( '<lastmod>' ) ;
1204+ } ) ;
1205+
1206+ it ( 'should return tag yearly archive pages' , async ( ) => {
1207+ const createdAt = new Date ( '2025-04-01T10:00:00.000Z' ) ;
1208+
1209+ await con . getRepository ( Archive ) . save ( [
1210+ {
1211+ ...archiveBase ,
1212+ scopeType : ArchiveScopeType . Tag ,
1213+ scopeId : 'golang' ,
1214+ periodType : ArchivePeriodType . Year ,
1215+ periodStart : new Date ( '2024-01-01T00:00:00.000Z' ) ,
1216+ createdAt,
1217+ } ,
1218+ ] ) ;
1219+
1220+ const res = await request ( app . server )
1221+ . get ( '/sitemaps/archive-pages-tag-year-0.xml' )
1222+ . expect ( 200 ) ;
1223+
1224+ expect ( res . text ) . toContain (
1225+ '<loc>http://localhost:5002/tags/golang/best-of/2024</loc>' ,
1226+ ) ;
1227+ } ) ;
1228+
1229+ it ( 'should return source monthly archive pages using handle' , async ( ) => {
1230+ const createdAt = new Date ( '2025-04-01T10:00:00.000Z' ) ;
1231+
1232+ await con . getRepository ( Archive ) . save ( [
1233+ {
1234+ ...archiveBase ,
1235+ scopeType : ArchiveScopeType . Source ,
1236+ scopeId : 'b' ,
1237+ periodType : ArchivePeriodType . Month ,
1238+ periodStart : new Date ( '2025-09-01T00:00:00.000Z' ) ,
1239+ createdAt,
1240+ } ,
1241+ ] ) ;
1242+
1243+ const res = await request ( app . server )
1244+ . get ( '/sitemaps/archive-pages-source-month-0.xml' )
1245+ . expect ( 200 ) ;
1246+
1247+ // Source archive uses handle (source 'b' has handle 'b')
1248+ expect ( res . text ) . toContain (
1249+ '<loc>http://localhost:5002/sources/b/best-of/2025/09</loc>' ,
1250+ ) ;
1251+ } ) ;
1252+
1253+ it ( 'should return 404 for invalid scopeType' , async ( ) => {
1254+ await request ( app . server )
1255+ . get ( '/sitemaps/archive-pages-invalid-month-0.xml' )
1256+ . expect ( 404 ) ;
1257+ } ) ;
1258+
1259+ it ( 'should return 404 for invalid periodType' , async ( ) => {
1260+ await request ( app . server )
1261+ . get ( '/sitemaps/archive-pages-tag-invalid-0.xml' )
1262+ . expect ( 404 ) ;
1263+ } ) ;
1264+
1265+ it ( 'should return 404 for negative page' , async ( ) => {
1266+ await request ( app . server )
1267+ . get ( '/sitemaps/archive-pages-tag-month--1.xml' )
1268+ . expect ( 404 ) ;
1269+ } ) ;
1270+
1271+ it ( 'should return 404 for non-integer page' , async ( ) => {
1272+ await request ( app . server )
1273+ . get ( '/sitemaps/archive-pages-tag-month-abc.xml' )
1274+ . expect ( 404 ) ;
1275+ } ) ;
1276+
1277+ it ( 'should return empty urlset for page beyond data' , async ( ) => {
1278+ const res = await request ( app . server )
1279+ . get ( '/sitemaps/archive-pages-tag-month-999.xml' )
1280+ . expect ( 200 ) ;
1281+
1282+ expect ( res . text ) . toContain (
1283+ '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' ,
1284+ ) ;
1285+ expect ( res . text ) . not . toContain ( '<loc>' ) ;
1286+ } ) ;
1287+
1288+ it ( 'should exclude source archives when the source has been deleted' , async ( ) => {
1289+ await con . getRepository ( Archive ) . save ( [
1290+ {
1291+ ...archiveBase ,
1292+ scopeType : ArchiveScopeType . Source ,
1293+ scopeId : 'nonexistent-source' ,
1294+ periodType : ArchivePeriodType . Month ,
1295+ periodStart : new Date ( '2025-01-01T00:00:00.000Z' ) ,
1296+ createdAt : new Date ( ) ,
1297+ } ,
1298+ ] ) ;
1299+
1300+ const res = await request ( app . server )
1301+ . get ( '/sitemaps/archive-pages-source-month-0.xml' )
1302+ . expect ( 200 ) ;
1303+
1304+ expect ( res . text ) . not . toContain ( '/sources/nonexistent-source/best-of' ) ;
1305+ } ) ;
1306+ } ) ;
1307+
1308+ describe ( 'GET /sitemaps/index.xml (archive entries)' , ( ) => {
1309+ const archiveBase = {
1310+ subjectType : ArchiveSubjectType . Post ,
1311+ rankingType : ArchiveRankingType . Best ,
1312+ } ;
1313+
1314+ it ( 'should include archive-index and paginated archive-pages sitemaps' , async ( ) => {
1315+ await con . getRepository ( Archive ) . save ( [
1316+ {
1317+ ...archiveBase ,
1318+ scopeType : ArchiveScopeType . Tag ,
1319+ scopeId : 'golang' ,
1320+ periodType : ArchivePeriodType . Month ,
1321+ periodStart : new Date ( '2025-01-01T00:00:00.000Z' ) ,
1322+ createdAt : new Date ( ) ,
1323+ } ,
1324+ {
1325+ ...archiveBase ,
1326+ scopeType : ArchiveScopeType . Source ,
1327+ scopeId : 'a' ,
1328+ periodType : ArchivePeriodType . Year ,
1329+ periodStart : new Date ( '2024-01-01T00:00:00.000Z' ) ,
1330+ createdAt : new Date ( ) ,
1331+ } ,
1332+ ] ) ;
1333+
1334+ const res = await request ( app . server )
1335+ . get ( '/sitemaps/index.xml' )
1336+ . expect ( 200 ) ;
1337+
1338+ expect ( res . text ) . toContain (
1339+ '<loc>http://localhost:5002/api/sitemaps/archive-index.xml</loc>' ,
1340+ ) ;
1341+ expect ( res . text ) . toContain (
1342+ '<loc>http://localhost:5002/api/sitemaps/archive-pages-tag-month-0.xml</loc>' ,
1343+ ) ;
1344+ expect ( res . text ) . toContain (
1345+ '<loc>http://localhost:5002/api/sitemaps/archive-pages-source-year-0.xml</loc>' ,
1346+ ) ;
1347+ // Should not contain old non-paginated archive-pages.xml
1348+ expect ( res . text ) . not . toContain (
1349+ '<loc>http://localhost:5002/api/sitemaps/archive-pages.xml</loc>' ,
1350+ ) ;
1351+ } ) ;
1352+
1353+ it ( 'should not include archive-pages entries when no archives exist' , async ( ) => {
1354+ const res = await request ( app . server )
1355+ . get ( '/sitemaps/index.xml' )
1356+ . expect ( 200 ) ;
1357+
1358+ expect ( res . text ) . toContain (
1359+ '<loc>http://localhost:5002/api/sitemaps/archive-index.xml</loc>' ,
1360+ ) ;
1361+ expect ( res . text ) . not . toContain ( 'archive-pages-' ) ;
1362+ } ) ;
1363+ } ) ;
1364+
10581365describe ( 'getSitemapRowLastmod' , ( ) => {
10591366 it ( 'should normalize pg timestamp format to ISO-8601' , ( ) => {
10601367 const normalizedLastmod = getSitemapRowLastmod ( {
0 commit comments