From 3e3b0bfb501d2bfcceaae73a47b35d094f61dd1e Mon Sep 17 00:00:00 2001 From: Nicolas Dorseuil Date: Thu, 30 Apr 2026 08:56:37 +0200 Subject: [PATCH] Add tests for markdown serving based on user agents --- packages/gitbook/tests/robots.test.ts | 50 +++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/packages/gitbook/tests/robots.test.ts b/packages/gitbook/tests/robots.test.ts index 0b88a52e3f..84e55bf55f 100644 --- a/packages/gitbook/tests/robots.test.ts +++ b/packages/gitbook/tests/robots.test.ts @@ -1,6 +1,56 @@ import { describe, expect, it } from 'bun:test'; import { getContentTestURL } from './utils'; +const TEST_PAGE_URL = 'https://gitbook.gitbook.io/test-gitbook-open/text-page'; + +describe('markdown serving based on user agent', () => { + it('should serve markdown to GPTBot (ua-match AI agent)', async () => { + const response = await fetch(getContentTestURL(TEST_PAGE_URL), { + headers: { + 'User-Agent': 'GPTBot/1.2', + }, + }); + + expect(response.status).toBe(200); + expect(response.headers.get('content-type')).toContain('text/markdown'); + }); + + it('should serve markdown to ClaudeBot (ua-match AI agent)', async () => { + const response = await fetch(getContentTestURL(TEST_PAGE_URL), { + headers: { + 'User-Agent': 'ClaudeBot/1.0', + }, + }); + + expect(response.status).toBe(200); + expect(response.headers.get('content-type')).toContain('text/markdown'); + }); + + it('should NOT serve markdown to Slackbot (heuristic detection only)', async () => { + const response = await fetch(getContentTestURL(TEST_PAGE_URL), { + headers: { + 'User-Agent': + 'Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)', + }, + }); + + expect(response.status).toBe(200); + expect(response.headers.get('content-type')).toContain('text/html'); + }); + + it('should NOT serve markdown to Googlebot (traditional bot, not an AI agent)', async () => { + const response = await fetch(getContentTestURL(TEST_PAGE_URL), { + headers: { + 'User-Agent': + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + }, + }); + + expect(response.status).toBe(200); + expect(response.headers.get('content-type')).toContain('text/html'); + }); +}); + describe('robots.txt', () => { it('declares allow content signals for public sites', async () => { const response = await fetch(