From 9f1645d086c363652cf1ff1a0315a4035f7df37a Mon Sep 17 00:00:00 2001 From: catchingknives <35201177+catchingknives@users.noreply.github.com> Date: Mon, 18 May 2026 07:19:57 +0200 Subject: [PATCH] feat(pulse/telegram): render markdown in messages via HTML parse mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Telegram module sends ctx.reply(text) with no parse_mode, so the DA's **bold**, *italic*, `code`, code fences, and links arrive as literal characters on the phone. The existing TELEGRAM MODE OVERRIDE prompt worked around this by steering the DA toward plain prose — a render-side problem masquerading as a content-side restriction. Changes: - New `mdToHtml` helper in modules/markdown-html.ts converts a useful subset of markdown to Telegram HTML: bold, italic, strikethrough, inline code, fenced code, links, ATX headers (→ bold lines), bullets (→ '•'). - Telegram module sends with parse_mode: 'HTML' by default; plain-text fallback on any send error via .catch(). - New `markdown_mode: "html" | "plain"` field in TelegramConfig, defaults to 'html' — opt-out preserved for users who want raw text. - TELEGRAM MODE OVERRIDE prompt flipped from 'no formatting' to an inline cheatsheet of what renders, so the DA writes naturally- formatted prose. Robustness: - Code blocks are stash-and-restored before other transforms run — bold/italic cannot leak into them. - Italic boundaries admit punctuation, whitespace, and HTML tag chars so '**a _b_**' nests correctly while 'snake_case' and 'Array' survive untouched. - Unbalanced markers degrade to literal characters, so chunk-splitting at the 4096-char boundary cannot produce broken HTML. Tests: $ bun test ./Releases/v5.0.0/.claude/PAI/PULSE/modules/telegram.test.ts 21 pass, 0 fail End-to-end validation: showcase message exercising every transformation sent through @nairyo_bot; renders correctly on iOS. --- .../PAI/PULSE/modules/markdown-html.ts | 60 ++++++++++ .../PAI/PULSE/modules/telegram.test.ts | 104 ++++++++++++++++++ .../.claude/PAI/PULSE/modules/telegram.ts | 59 +++++++--- 3 files changed, 206 insertions(+), 17 deletions(-) create mode 100644 Releases/v5.0.0/.claude/PAI/PULSE/modules/markdown-html.ts create mode 100644 Releases/v5.0.0/.claude/PAI/PULSE/modules/telegram.test.ts diff --git a/Releases/v5.0.0/.claude/PAI/PULSE/modules/markdown-html.ts b/Releases/v5.0.0/.claude/PAI/PULSE/modules/markdown-html.ts new file mode 100644 index 000000000..77177be1e --- /dev/null +++ b/Releases/v5.0.0/.claude/PAI/PULSE/modules/markdown-html.ts @@ -0,0 +1,60 @@ +/** + * Markdown → Telegram HTML converter. + * + * Telegram's parse_mode "HTML" supports a small tag set: b, i, u, s, code, + * pre, a, tg-spoiler. It does not render headers, lists, or tables. This + * helper converts a useful subset of common markdown emitted by the DA into + * that tag set so prose renders cleanly on phones. + * + * HTML special chars are escaped first so model-emitted '<', '>', '&' survive + * verbatim inside the message body. + * + * Design choices: + * - Code blocks and inline code are stashed to placeholders BEFORE other + * transforms run, so bold/italic/etc. cannot leak into code content. + * - Italic markers (* and _) require punctuation/whitespace/tag-boundary on + * both sides so snake_case identifiers and arithmetic stars are not + * mangled, while still allowing italic to nest inside bold tags. + * - Unbalanced markers render as literal characters — no broken HTML can be + * emitted, so chunk-splitting before conversion is safe. + * - Headers and bullets collapse to lines and "• " glyphs respectively, + * since Telegram does not natively render either. + */ +export function mdToHtml(text: string): string { + let out = text.replace(/&/g, "&").replace(//g, ">") + + // Stash code blocks first so subsequent bold/italic/etc. cannot mutate + // their contents. Placeholder sentinels cannot appear in normal model + // output and no other regex below matches against them. + const stash: string[] = [] + const place = (html: string): string => { + const i = stash.length + stash.push(html) + return ` CODE${i} ` + } + + out = out.replace( + /```[a-zA-Z0-9_+-]*\n([\s\S]*?)```/g, + (_, code) => place(`
${code.replace(/\n$/, "")}
`), + ) + out = out.replace(/`([^`\n]+)`/g, (_, code) => place(`${code}`)) + + // Links [text](url) + out = out.replace(/\[([^\]]+)\]\(([^)\s]+)\)/g, '$1') + // Bold **x** + out = out.replace(/\*\*([^*\n]+)\*\*/g, "$1") + // Italic *x* / _x_ — boundary includes HTML tag chars (<, >) so italic can + // nest inside bold (e.g. **a _b_**) while still rejecting snake_case. + out = out.replace(/(^|[\s(>])\*([^*\n]+)\*(?=[\s).,!?:;<]|$)/g, "$1$2") + out = out.replace(/(^|[\s(>])_([^_\n]+)_(?=[\s).,!?:;<]|$)/g, "$1$2") + // Strikethrough ~~x~~ + out = out.replace(/~~([^~\n]+)~~/g, "$1") + // ATX headers + out = out.replace(/^#{1,6}\s+(.+)$/gm, "$1") + // Bullet markers + out = out.replace(/^\s*[-*+]\s+/gm, "• ") + + // Restore code placeholders. + out = out.replace(/ CODE(\d+) /g, (_, i) => stash[Number(i)]!) + return out +} diff --git a/Releases/v5.0.0/.claude/PAI/PULSE/modules/telegram.test.ts b/Releases/v5.0.0/.claude/PAI/PULSE/modules/telegram.test.ts new file mode 100644 index 000000000..b37c30f26 --- /dev/null +++ b/Releases/v5.0.0/.claude/PAI/PULSE/modules/telegram.test.ts @@ -0,0 +1,104 @@ +/** + * Tests for the mdToHtml helper used by the Telegram module. + * + * Run with: bun test Releases/v5.0.0/.claude/PAI/PULSE/modules/telegram.test.ts + */ +import { describe, expect, test } from "bun:test" +import { mdToHtml } from "./markdown-html" + +describe("mdToHtml", () => { + test("HTML special chars are escaped first", () => { + expect(mdToHtml("a < b & c > d")).toBe("a < b & c > d") + }) + + test("bold **x** → x", () => { + expect(mdToHtml("this is **bold** here")).toBe("this is bold here") + }) + + test("italic *x* with boundary → x", () => { + expect(mdToHtml("an *emphasised* word")).toBe("an emphasised word") + }) + + test("italic _x_ with boundary → x", () => { + expect(mdToHtml("an _emphasised_ word")).toBe("an emphasised word") + }) + + test("snake_case underscores survive intact", () => { + expect(mdToHtml("call connection_reference_name twice")).toBe( + "call connection_reference_name twice", + ) + }) + + test("inline `code` → code", () => { + expect(mdToHtml("run `bun test` now")).toBe("run bun test now") + }) + + test("brackets inside inline code are escaped", () => { + expect(mdToHtml("use `arr` here")).toBe("use arr<T> here") + }) + + test("fenced code blocks →
", () => {
+    const input = "```ts\nlet x = 1\nlet y = 2\n```"
+    expect(mdToHtml(input)).toBe("
let x = 1\nlet y = 2
") + }) + + test("fenced code with no language tag", () => { + expect(mdToHtml("```\nhello\n```")).toBe("
hello
") + }) + + test("links [text](url) → ", () => { + expect(mdToHtml("see [docs](https://example.com) for more")).toBe( + 'see docs for more', + ) + }) + + test("nested bold + italic", () => { + expect(mdToHtml("**bold _and italic_**")).toBe("bold and italic") + }) + + test("strikethrough ~~x~~ → ", () => { + expect(mdToHtml("this is ~~gone~~ now")).toBe("this is gone now") + }) + + test("ATX headers become bold lines", () => { + expect(mdToHtml("# Heading\nbody")).toBe("Heading\nbody") + expect(mdToHtml("### Sub")).toBe("Sub") + }) + + test("bullet markers become bullet glyphs", () => { + expect(mdToHtml("- one\n- two\n* three\n+ four")).toBe( + "• one\n• two\n• three\n• four", + ) + }) + + test("unbalanced bold marker renders as literal asterisks (no broken HTML)", () => { + expect(mdToHtml("starting **but no close")).toBe("starting **but no close") + }) + + test("unbalanced italic underscore renders as literal", () => { + expect(mdToHtml("snake_case at end_")).toBe("snake_case at end_") + }) + + test("code block content is not re-processed for inline markdown", () => { + const input = "```\n**not bold**\n```" + expect(mdToHtml(input)).toBe("
**not bold**
") + }) + + test("URL with underscores is not italicised", () => { + expect(mdToHtml("[link](https://x.com/my_path_here)")).toBe( + 'link', + ) + }) + + test("multiple bold spans in one line", () => { + expect(mdToHtml("**a** and **b**")).toBe("a and b") + }) + + test("empty input returns empty string", () => { + expect(mdToHtml("")).toBe("") + }) + + test("plain prose with no markdown is unchanged", () => { + expect(mdToHtml("just a normal sentence.")).toBe("just a normal sentence.") + }) +}) diff --git a/Releases/v5.0.0/.claude/PAI/PULSE/modules/telegram.ts b/Releases/v5.0.0/.claude/PAI/PULSE/modules/telegram.ts index 22b0fea2e..a05b3b46e 100644 --- a/Releases/v5.0.0/.claude/PAI/PULSE/modules/telegram.ts +++ b/Releases/v5.0.0/.claude/PAI/PULSE/modules/telegram.ts @@ -12,6 +12,7 @@ import { Bot } from "grammy" import { query } from "@anthropic-ai/claude-agent-sdk" import { ConversationStore } from "../lib/conversation" import { sanitize, analyzeForInjection } from "../lib/sanitize" +import { mdToHtml } from "./markdown-html" import { join } from "path" import { appendFile, mkdir } from "fs/promises" @@ -31,6 +32,10 @@ export interface TelegramConfig { max_turns?: number sdk_timeout_ms?: number edit_interval_ms?: number + // Rendering mode for outgoing messages. "html" converts a useful subset of + // markdown to Telegram HTML (bold, italic, code, pre, links, headers, + // bullets, strikethrough); "plain" sends raw text. Defaults to "html". + markdown_mode?: "html" | "plain" } // ── Constants ── @@ -114,6 +119,7 @@ export async function startTelegram(config: TelegramConfig): Promise { const maxTurns = config.max_turns ?? 25 const sdkTimeoutMs = config.sdk_timeout_ms ?? 120_000 const editIntervalMs = config.edit_interval_ms ?? 800 + const markdownMode = config.markdown_mode ?? "html" // Ensure directories await mkdir(STATE_DIR, { recursive: true }) @@ -204,14 +210,22 @@ You are {{DA_NAME}}, responding via Telegram. {{PRINCIPAL_NAME}} is messaging yo CRITICAL RULES FOR TELEGRAM MODE: - IGNORE all ALGORITHM/NATIVE/MINIMAL format templates from CLAUDE.md. Those are for terminal sessions only. -- NO format headers (no ════, no 🗒️, no ━━━, no ISC criteria, no phase markers) -- NO emoji prefixes, NO bullet formatting -- Speak as {{DA_NAME}} — first person, natural, conversational, like talking to a friend -- Keep responses under 200 words -- No code blocks unless {{PRINCIPAL_NAME}} specifically asks for code -- NEVER use voice notification curls (no http://localhost:31337/notify calls) -- You have ALL PAI capabilities — skills, email, calendar, lights, everything -- When doing tasks, do them and confirm briefly what you did`, +- NO PAI box headers (no ════, no 🗒️, no ━━━, no ISC criteria, no phase markers) — those don't render in Telegram and look like noise. +- USE MARKDOWN FREELY — it renders. Telegram converts your markdown to native formatting before display: + · **bold** → bold + · *italic* or _italic_ → italic + · \`inline code\` → monospace + · triple-backtick fenced blocks → preformatted code blocks + · [link text](https://url) → tappable link + · - bullet or * bullet → bullet list + · # / ## / ### headings → bold heading lines + · ~~strike~~ → strikethrough +- Speak as {{DA_NAME}} — first person, natural, conversational, like talking to a friend. +- Keep responses under 200 words unless {{PRINCIPAL_NAME}} explicitly asks for more. +- Use code blocks when sharing commands, paths, JSON, or anything copy-paste; use bold for emphasis; use bullet lists when listing 3+ items. +- NEVER use voice notification curls (no http://localhost:31337/notify calls). +- You have ALL PAI capabilities — skills, email, calendar, lights, everything. +- When doing tasks, do them and confirm briefly what you did.`, }, } @@ -295,29 +309,40 @@ CRITICAL RULES FOR TELEGRAM MODE: log("error", "Empty response from SDK") } - // Final clean message + // Final clean message — render markdown if configured, with plain-text + // fallback for any edge case where Telegram rejects the HTML. + const useHtml = markdownMode === "html" + const render = (s: string) => (useHtml ? mdToHtml(s) : s) + const sendOpts = useHtml ? { parse_mode: "HTML" as const } : undefined + if (fullText.length <= MAX_TELEGRAM_LENGTH) { + const rendered = render(fullText) if (messageId) { - await ctx.api.editMessageText(chatId, messageId, fullText).catch(() => {}) + await ctx.api.editMessageText(chatId, messageId, rendered, sendOpts) + .catch(() => ctx.api.editMessageText(chatId, messageId, fullText).catch(() => {})) } else { - await ctx.reply(fullText) + await ctx.reply(rendered, sendOpts).catch(() => ctx.reply(fullText)) } } else { - // Split long messages + // Split long messages on 4096-char boundary. Splitting raw text before + // markdown conversion is safe: mdToHtml requires balanced markers, so + // a marker split across two chunks just renders as literal characters. const chunks: string[] = [] let remaining = fullText while (remaining.length > 0) { chunks.push(remaining.slice(0, MAX_TELEGRAM_LENGTH)) remaining = remaining.slice(MAX_TELEGRAM_LENGTH) } + const rendered = chunks.map(render) if (messageId) { - await ctx.api.editMessageText(chatId, messageId, chunks[0]!).catch(() => {}) - for (const chunk of chunks.slice(1)) { - await ctx.reply(chunk) + await ctx.api.editMessageText(chatId, messageId, rendered[0]!, sendOpts) + .catch(() => ctx.api.editMessageText(chatId, messageId, chunks[0]!).catch(() => {})) + for (let i = 1; i < chunks.length; i++) { + await ctx.reply(rendered[i]!, sendOpts).catch(() => ctx.reply(chunks[i]!)) } } else { - for (const chunk of chunks) { - await ctx.reply(chunk) + for (let i = 0; i < chunks.length; i++) { + await ctx.reply(rendered[i]!, sendOpts).catch(() => ctx.reply(chunks[i]!)) } } }