Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
d79d30b
test(message-parser): add benchmark suite to baseline Peggy performance
Harshit2405-2004 Mar 3, 2026
ffd0278
feat(message-parser): implement BlockSplitter PoC (Layer 1)
Harshit2405-2004 Mar 3, 2026
0c7daa2
test(message-parser): add skip-flag regression tests to document comp…
Harshit2405-2004 Mar 3, 2026
8ae0719
test(message-parser): expand benchmark suite with mixed and pathologi…
Harshit2405-2004 Mar 3, 2026
04c8fcd
refactor(message-parser): fix list segmentation and preserve syntax i…
Harshit2405-2004 Mar 3, 2026
25a81ec
refactor(message-parser): address refined review comments for BlockSp…
Harshit2405-2004 Mar 3, 2026
796b681
Merge branch 'develop' into refactor/message-parser-block-splitter
Harshit2405-2004 Mar 3, 2026
08d1dc4
refactor(message-parser): final verified fixes for BlockSplitter revi…
Harshit2405-2004 Mar 3, 2026
7874fff
refactor(message-parser): address all review comments for BlockSplitt…
Harshit2405-2004 Mar 3, 2026
2f80b11
Update packages/message-parser/benchmarks/parser.bench.ts
Harshit2405-2004 Mar 3, 2026
4bee6db
Merge branch 'develop' into test/message-parser-benchmark-suite
Harshit2405-2004 Mar 3, 2026
1bccb50
refactor(message-parser): handle mixed list ordering and restore regr…
Harshit2405-2004 Mar 5, 2026
3b3722b
chore: consolidate parser PoC
Harshit2405-2004 Mar 5, 2026
a76d0f5
Fix: Address cubic and coderabbit parser observations
Harshit2405-2004 Mar 5, 2026
1ae425c
Update packages/message-parser/tests/skip-flags-regression.spec.ts
Harshit2405-2004 Mar 5, 2026
1923a6a
Update packages/message-parser/tests/skip-flags-regression.spec.ts
Harshit2405-2004 Mar 5, 2026
81745c2
Merge branch 'develop' into feat/message-parser-poc
Harshit2405-2004 Mar 5, 2026
e753deb
Merge branch 'develop' into feat/message-parser-poc
Harshit2405-2004 Mar 6, 2026
bfaa13e
fix(message-parser): replace regex with charCode dispatch in BlockSpl…
Harshit2405-2004 Apr 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/block-splitter-layer1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@rocket.chat/message-parser": patch
---

feat(message-parser): implement BlockSplitter PoC (Layer 1)
13 changes: 13 additions & 0 deletions packages/message-parser/benchmarks/parser.bench.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ const categories: BenchCategory[] = [
{ name: 'strike', input: '~~Hello world~~' },
{ name: 'nested', input: '**bold _italic_ and ~~strike~~**' },
{ name: 'deep nesting', input: '**bold _italic ~~strike _deep italic_~~_**' },
{ name: 'bold + italic mixed', input: '*Bold text* and _italic text_ in a message' },
{ name: 'deeply nested', input: '*bold _italic ~~strike~~ italic_ bold*' },
{ name: 'multiple', input: '**bold** normal _italic_ normal ~~strike~~ **more bold** _more italic_' },
],
},
Expand All @@ -83,6 +85,7 @@ const categories: BenchCategory[] = [
{ name: 'triple unicode (BigEmoji)', input: '😀🚀🌈', options: fullOptions },
{ name: 'in text', input: 'Hello :smile: world :heart: test :rocket: done', options: fullOptions },
{ name: 'mixed', input: 'Great job :thumbsup: 🎉 keep going :rocket:', options: fullOptions },
{ name: 'emoji heavy', input: ':smile: :wave: :rocket: :fire: :heart: :100:', options: fullOptions },
],
},
{
Expand All @@ -92,6 +95,7 @@ const categories: BenchCategory[] = [
{ name: 'multiple users', input: '@admin @user1 @moderator' },
{ name: 'channel', input: '#general' },
{ name: 'mixed', input: 'Hey @admin check #general and @user1' },
{ name: 'mentions (suggested)', input: 'Hey @john and @jane, check #general' },
],
},
{
Expand Down Expand Up @@ -141,6 +145,10 @@ const categories: BenchCategory[] = [
name: 'long with formatting',
input: '**bold** _italic_ ~~strike~~ `code` @user #channel :smile: https://example.com '.repeat(10).trim(),
},
{
name: 'unmatched markers (pathological)',
input: '*_~*_~*_~*_~*_~ hello world absolutely no closing markers anywhere at all',
},
],
},
{
Expand All @@ -158,6 +166,11 @@ const categories: BenchCategory[] = [
'**Release Notes v7.0**\n- [x] Fix #12345\n- [ ] Update docs\n\n> Important: check https://docs.rocket.chat\n\ncc @admin @devlead #releases :rocket:',
options: fullOptions,
},
{
name: 'realistic chat message',
input: 'Hello @team, please review the *important* update:\n\n1. Run `yarn build`\n2. Check #deployments\n\n*Thanks!* :rocket:',
options: fullOptions,
},
],
},
{
Expand Down
212 changes: 212 additions & 0 deletions packages/message-parser/src/BlockSplitter.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
export enum BlockType {
PARAGRAPH = 'PARAGRAPH',
HEADING = 'HEADING',
CODE = 'CODE',
LIST = 'LIST',
QUOTE = 'QUOTE',
}

export type Block = {
type: BlockType;
content: string;
level?: number;
language?: string;
ordered?: boolean;
incomplete?: boolean;
};

export class BlockSplitter {
public static split(input: string): Block[] {
const lines = input.split(/\r?\n/);
const blocks: Block[] = [];
let currentBlock: Block | null = null;

for (let i = 0; i < lines.length; i++) {
const line = lines[i];

// Check for heading: # ## ### ####
const headingResult = this.parseHeading(line);
if (headingResult) {
this.flush(blocks, currentBlock);
currentBlock = {
type: BlockType.HEADING,
content: headingResult.content,
level: headingResult.level,
};
this.flush(blocks, currentBlock);
currentBlock = null;
continue;
}

if (line.startsWith('```')) {
this.flush(blocks, currentBlock);
const language = line.slice(3).trim();
const codeLines = [];
let closed = false;
i++;
while (i < lines.length && !lines[i].startsWith('```')) {
codeLines.push(lines[i]);
i++;
}
if (i < lines.length) {
closed = true;
}
blocks.push({
type: BlockType.CODE,
content: codeLines.join('\n'),
language,
incomplete: !closed,
});
currentBlock = null;
continue;
}

// Check for blank line - don't flush lists if the blank line has leading spaces
const isBlank = line.trim() === '';
if (isBlank) {
const hasLeadingSpaces = line.length > 0 && line.charCodeAt(0) === 32; // ' '
if (!(hasLeadingSpaces && currentBlock?.type === BlockType.LIST)) {
this.flush(blocks, currentBlock);
currentBlock = null;
}
continue;
}

// Check for list item
const listResult = this.parseListItem(line);
const isIndented = line.length > 0 && line.charCodeAt(0) === 32;

if (listResult) {
if (currentBlock?.type !== BlockType.LIST) {
this.flush(blocks, currentBlock);
currentBlock = {
type: BlockType.LIST,
content: line,
ordered: listResult.isOrdered,
};
} else {
if (currentBlock.ordered !== undefined && currentBlock.ordered !== listResult.isOrdered) {
currentBlock.ordered = undefined;
}
currentBlock.content += `\n${line}`;
}
continue;
}

if (isIndented && currentBlock?.type === BlockType.LIST) {
currentBlock.content += `\n${line}`;
continue;
}

if (line.startsWith('>')) {
if (currentBlock?.type !== BlockType.QUOTE) {
this.flush(blocks, currentBlock);
currentBlock = {
type: BlockType.QUOTE,
content: line,
};
} else {
currentBlock.content += `\n${line}`;
}
continue;
}

if (currentBlock?.type !== BlockType.PARAGRAPH) {
this.flush(blocks, currentBlock);
currentBlock = {
type: BlockType.PARAGRAPH,
content: line,
};
} else {
currentBlock.content += `\n${line}`;
}
}

this.flush(blocks, currentBlock);
return blocks;
}

private static parseHeading(line: string): { level: number; content: string } | null {
let level = 0;
let pos = 0;

// Count leading '#' characters (max 6 for heading)
while (pos < line.length && line.charCodeAt(pos) === 35 /* '#' */ && level < 6) {
level++;
pos++;
}

if (level === 0) {
return null;
}

// Must have at least one space after '#'
if (pos >= line.length || line.charCodeAt(pos) !== 32 /* ' ' */) {
Copy link
Copy Markdown
Contributor

@cubic-dev-ai cubic-dev-ai Bot Apr 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2: Parser whitespace handling regressed from generic whitespace (\s) to literal spaces only, breaking tab-based heading/list recognition and changing heading content normalization.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At packages/message-parser/src/BlockSplitter.ts, line 144:

<comment>Parser whitespace handling regressed from generic whitespace (`\s`) to literal spaces only, breaking tab-based heading/list recognition and changing heading content normalization.</comment>

<file context>
@@ -120,6 +126,84 @@ export class BlockSplitter {
+		}
+
+		// Must have at least one space after '#'
+		if (pos >= line.length || line.charCodeAt(pos) !== 32 /* ' ' */) {
+			return null;
+		}
</file context>
Fix with Cubic

return null;
}

// Skip the space and get content
pos++;
const content = line.slice(pos);

// Content must not be empty
if (content.length === 0) {
return null;
}

return { level, content };
}

private static parseListItem(line: string): { isOrdered: boolean } | null {
let pos = 0;

// Skip leading spaces
while (pos < line.length && line.charCodeAt(pos) === 32 /* ' ' */) {
pos++;
}

const start = pos;

// Check for ordered list (digits followed by '.')
if (pos < line.length && line.charCodeAt(pos) >= 48 && line.charCodeAt(pos) <= 57 /* 0-9 */) {
while (pos < line.length && line.charCodeAt(pos) >= 48 && line.charCodeAt(pos) <= 57) {
pos++;
}
if (pos < line.length && line.charCodeAt(pos) === 46 /* '.' */) {
pos++;
// Must have space after '.'
if (pos < line.length && line.charCodeAt(pos) === 32 /* ' ' */) {
pos++;
// Must have content after space
if (pos < line.length) {
return { isOrdered: true };
}
}
}
// Reset if ordered list pattern didn't match
pos = start;
}

// Check for unordered list (-, *, or +)
const char = line.charCodeAt(pos);
if (char === 45 /* '-' */ || char === 42 /* '*' */ || char === 43 /* '+' */) {
pos++;
// Must have space after marker
if (pos < line.length && line.charCodeAt(pos) === 32 /* ' ' */) {
pos++;
// Must have content after space
if (pos < line.length) {
return { isOrdered: false };
}
}
}

return null;
}

private static flush(blocks: Block[], block: Block | null) {
if (block) {
blocks.push(block);
}
}
}
Loading