Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
307 changes: 307 additions & 0 deletions __tests__/sitemaps.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { DataSource, DeepPartial } from 'typeorm';
import createOrGetConnection from '../src/db';
import {
AGENTS_DIGEST_SOURCE,
Archive,
CollectionPost,
Keyword,
KeywordStatus,
Expand All @@ -18,6 +19,12 @@ import {
SourceType,
User,
} from '../src/entity';
import {
ArchivePeriodType,
ArchiveRankingType,
ArchiveScopeType,
ArchiveSubjectType,
} from '../src/common/archive';
import { getSitemapRowLastmod } from '../src/routes/sitemaps';
import { updateFlagsStatement } from '../src/common/utils';
import { sourcesFixture } from './fixture/source';
Expand Down Expand Up @@ -1055,6 +1062,306 @@ describe('GET /sitemaps/evergreen.xml', () => {
});
});

describe('GET /sitemaps/archive-index.xml', () => {
const archiveBase = {
subjectType: ArchiveSubjectType.Post,
rankingType: ArchiveRankingType.Best,
};

it('should return index pages for tags and sources with archives', async () => {
const createdAt = new Date('2025-03-01T10:00:00.000Z');

await con.getRepository(Archive).save([
{
...archiveBase,
scopeType: ArchiveScopeType.Tag,
scopeId: 'rust',
periodType: ArchivePeriodType.Month,
periodStart: new Date('2025-01-01T00:00:00.000Z'),
createdAt,
},
{
...archiveBase,
scopeType: ArchiveScopeType.Tag,
scopeId: 'rust',
periodType: ArchivePeriodType.Month,
periodStart: new Date('2025-02-01T00:00:00.000Z'),
createdAt,
},
{
...archiveBase,
scopeType: ArchiveScopeType.Source,
scopeId: 'a',
periodType: ArchivePeriodType.Month,
periodStart: new Date('2025-01-01T00:00:00.000Z'),
createdAt,
},
{
...archiveBase,
scopeType: ArchiveScopeType.Global,
scopeId: null,
periodType: ArchivePeriodType.Month,
periodStart: new Date('2025-01-01T00:00:00.000Z'),
createdAt,
},
]);

const res = await request(app.server)
.get('/sitemaps/archive-index.xml')
.expect(200);

expect(res.header['content-type']).toContain('application/xml');
expect(res.header['cache-control']).toEqual(
'public, max-age=7200, s-maxage=7200',
);
expect(res.text).toContain(
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
);
// Source 'a' has handle 'a'
expect(res.text).toContain(
'<loc>http://localhost:5002/sources/a/best-of</loc>',
);
// Tag rust should appear once (deduplicated)
expect(res.text).toContain(
'<loc>http://localhost:5002/tags/rust/best-of</loc>',
);
// Global archives should not appear
expect(res.text).not.toContain('/best-of</loc>\n');
// Only one entry for rust (two archives but one index)
const rustMatches = res.text.match(/\/tags\/rust\/best-of<\/loc>/g);
expect(rustMatches).toHaveLength(1);
});

it('should exclude source archives when the source has been deleted', async () => {
await con.getRepository(Archive).save([
{
...archiveBase,
scopeType: ArchiveScopeType.Source,
scopeId: 'nonexistent-source',
periodType: ArchivePeriodType.Month,
periodStart: new Date('2025-01-01T00:00:00.000Z'),
createdAt: new Date(),
},
]);

const res = await request(app.server)
.get('/sitemaps/archive-index.xml')
.expect(200);

expect(res.text).not.toContain('/sources/nonexistent-source/best-of');
});
});

describe('GET /sitemaps/archive-pages-:scopeType-:periodType-:page.xml', () => {
const archiveBase = {
subjectType: ArchiveSubjectType.Post,
rankingType: ArchiveRankingType.Best,
};

it('should return tag monthly archive pages with correct URL format', async () => {
const createdAt = new Date('2025-04-01T10:00:00.000Z');

await con.getRepository(Archive).save([
{
...archiveBase,
scopeType: ArchiveScopeType.Tag,
scopeId: 'golang',
periodType: ArchivePeriodType.Month,
periodStart: new Date('2025-01-01T00:00:00.000Z'),
createdAt,
},
{
...archiveBase,
scopeType: ArchiveScopeType.Tag,
scopeId: 'golang',
periodType: ArchivePeriodType.Year,
periodStart: new Date('2024-01-01T00:00:00.000Z'),
createdAt,
},
]);

const res = await request(app.server)
.get('/sitemaps/archive-pages-tag-month-0.xml')
.expect(200);

expect(res.header['content-type']).toContain('application/xml');
expect(res.header['cache-control']).toEqual(
'public, max-age=7200, s-maxage=7200',
);
expect(res.text).toContain(
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
);
// Monthly tag archive with zero-padded month
expect(res.text).toContain(
'<loc>http://localhost:5002/tags/golang/best-of/2025/01</loc>',
);
// Should not include yearly archives
expect(res.text).not.toContain(
'<loc>http://localhost:5002/tags/golang/best-of/2024</loc>',
);
// Lastmod should be present
expect(res.text).toContain('<lastmod>');
});

it('should return tag yearly archive pages', async () => {
const createdAt = new Date('2025-04-01T10:00:00.000Z');

await con.getRepository(Archive).save([
{
...archiveBase,
scopeType: ArchiveScopeType.Tag,
scopeId: 'golang',
periodType: ArchivePeriodType.Year,
periodStart: new Date('2024-01-01T00:00:00.000Z'),
createdAt,
},
]);

const res = await request(app.server)
.get('/sitemaps/archive-pages-tag-year-0.xml')
.expect(200);

expect(res.text).toContain(
'<loc>http://localhost:5002/tags/golang/best-of/2024</loc>',
);
});

it('should return source monthly archive pages using handle', async () => {
const createdAt = new Date('2025-04-01T10:00:00.000Z');

await con.getRepository(Archive).save([
{
...archiveBase,
scopeType: ArchiveScopeType.Source,
scopeId: 'b',
periodType: ArchivePeriodType.Month,
periodStart: new Date('2025-09-01T00:00:00.000Z'),
createdAt,
},
]);

const res = await request(app.server)
.get('/sitemaps/archive-pages-source-month-0.xml')
.expect(200);

// Source archive uses handle (source 'b' has handle 'b')
expect(res.text).toContain(
'<loc>http://localhost:5002/sources/b/best-of/2025/09</loc>',
);
});

it('should return 404 for invalid scopeType', async () => {
await request(app.server)
.get('/sitemaps/archive-pages-invalid-month-0.xml')
.expect(404);
});

it('should return 404 for invalid periodType', async () => {
await request(app.server)
.get('/sitemaps/archive-pages-tag-invalid-0.xml')
.expect(404);
});

it('should return 404 for negative page', async () => {
await request(app.server)
.get('/sitemaps/archive-pages-tag-month--1.xml')
.expect(404);
});

it('should return 404 for non-integer page', async () => {
await request(app.server)
.get('/sitemaps/archive-pages-tag-month-abc.xml')
.expect(404);
});

it('should return empty urlset for page beyond data', async () => {
const res = await request(app.server)
.get('/sitemaps/archive-pages-tag-month-999.xml')
.expect(200);

expect(res.text).toContain(
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">',
);
expect(res.text).not.toContain('<loc>');
});

it('should exclude source archives when the source has been deleted', async () => {
await con.getRepository(Archive).save([
{
...archiveBase,
scopeType: ArchiveScopeType.Source,
scopeId: 'nonexistent-source',
periodType: ArchivePeriodType.Month,
periodStart: new Date('2025-01-01T00:00:00.000Z'),
createdAt: new Date(),
},
]);

const res = await request(app.server)
.get('/sitemaps/archive-pages-source-month-0.xml')
.expect(200);

expect(res.text).not.toContain('/sources/nonexistent-source/best-of');
});
});

describe('GET /sitemaps/index.xml (archive entries)', () => {
const archiveBase = {
subjectType: ArchiveSubjectType.Post,
rankingType: ArchiveRankingType.Best,
};

it('should include archive-index and paginated archive-pages sitemaps', async () => {
await con.getRepository(Archive).save([
{
...archiveBase,
scopeType: ArchiveScopeType.Tag,
scopeId: 'golang',
periodType: ArchivePeriodType.Month,
periodStart: new Date('2025-01-01T00:00:00.000Z'),
createdAt: new Date(),
},
{
...archiveBase,
scopeType: ArchiveScopeType.Source,
scopeId: 'a',
periodType: ArchivePeriodType.Year,
periodStart: new Date('2024-01-01T00:00:00.000Z'),
createdAt: new Date(),
},
]);

const res = await request(app.server)
.get('/sitemaps/index.xml')
.expect(200);

expect(res.text).toContain(
'<loc>http://localhost:5002/api/sitemaps/archive-index.xml</loc>',
);
expect(res.text).toContain(
'<loc>http://localhost:5002/api/sitemaps/archive-pages-tag-month-0.xml</loc>',
);
expect(res.text).toContain(
'<loc>http://localhost:5002/api/sitemaps/archive-pages-source-year-0.xml</loc>',
);
// Should not contain old non-paginated archive-pages.xml
expect(res.text).not.toContain(
'<loc>http://localhost:5002/api/sitemaps/archive-pages.xml</loc>',
);
});

it('should not include archive-pages entries when no archives exist', async () => {
const res = await request(app.server)
.get('/sitemaps/index.xml')
.expect(200);

expect(res.text).toContain(
'<loc>http://localhost:5002/api/sitemaps/archive-index.xml</loc>',
);
expect(res.text).not.toContain('archive-pages-');
});
});

describe('getSitemapRowLastmod', () => {
it('should normalize pg timestamp format to ISO-8601', () => {
const normalizedLastmod = getSitemapRowLastmod({
Expand Down
Loading
Loading