Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
.idea

# Finder (MacOS) folder config
.DS_Store
.DS_Store
297 changes: 294 additions & 3 deletions bun.lock

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
{
"name": "fuel-mcp-server",
"version": "0.0.1",
"scripts": {
"build": "tsc && node postbuild.cjs && mkdir -p bin/napi-v3/darwin/arm64 && cp node_modules/onnxruntime-node/bin/napi-v3/darwin/arm64/* bin/napi-v3/darwin/arm64/ && mkdir -p build/Release && cp node_modules/sharp/build/Release/sharp-darwin-arm64v8.node build/Release/ && cp -r node_modules/sharp/vendor ./vendor",
"build:post": "node postbuild.cjs",
"start": "bun run src/index.ts",
"mcp-server": "bun run src/mcp-server.ts",
"index": "bun run src/indexer.ts",
"index": "bun run src/docs/indexer.ts",
"query": "bun run src/docs/query.ts --run",
"test": "bun test && bun run vitest run"
},
"devDependencies": {
"@types/bun": "latest",
"@types/node": "^20.11.20",
"esbuild": "^0.25.2",
"typescript": "^5",
"vitest": "^3.1.1"
Expand All @@ -21,8 +25,10 @@
"@qdrant/js-client-rest": "^1.13.0",
"@types/markdown-it": "^14.1.2",
"@xenova/transformers": "^2.17.2",
"dotenv": "^16.4.5",
"fuels": "^0.100.3",
"markdown-it": "^14.1.0",
"onnxruntime-node": "^1.18.0",
"zod": "^3.23.8"
}
}
}
120 changes: 120 additions & 0 deletions src/common/chunker.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import { describe, it, expect } from "bun:test";
import { chunkMarkdown } from "./chunker";

const estimateTokens = (text: string) => text.length;

describe("chunkMarkdown", () => {
it("should chunk basic markdown text based on target size", () => {
const markdown = `
# Title

This is the first paragraph. It has some text.

This is the second paragraph. It also has text, maybe a bit more.

## Subtitle

Another paragraph here.
`.trim();
const targetSize = 50;
const chunks = chunkMarkdown(markdown, targetSize, estimateTokens);

expect(chunks).toHaveLength(3);
expect(chunks[0]!.content).toContain("# Title");
expect(chunks[0]!.content).toContain("This is the first paragraph.");
expect(estimateTokens(chunks[0]!.content)).toBeLessThanOrEqual(targetSize * 1.5);
expect(chunks[1]!.content).toContain("This is the second paragraph.");
expect(estimateTokens(chunks[1]!.content)).toBeLessThanOrEqual(targetSize * 1.5);
expect(chunks[2]!.content).toContain("## Subtitle");
expect(chunks[2]!.content).toContain("Another paragraph here.");
expect(estimateTokens(chunks[2]!.content)).toBeLessThanOrEqual(targetSize * 1.5);
});

it("should not split code blocks", () => {
const markdown = `
Intro text.

\`\`\`javascript
// This is a code block
function hello() {
console.log("Hello");
}
\`\`\`

More text.

\`\`\`python
# Another code block
print("World")
\`\`\`
`.trim();
const targetSize = 50;
const chunks = chunkMarkdown(markdown, targetSize, estimateTokens);

expect(chunks).toHaveLength(4);
expect(chunks[0]!.content).toBe("Intro text.");
expect(chunks[1]!.content).toContain("function hello()");
expect(chunks[1]!.type).toBe("code");
expect(chunks[2]!.content).toBe("More text.");
expect(chunks[3]!.content).toContain('print("World")');
expect(chunks[3]!.type).toBe("code");
});

it("should create a large chunk if a single code block exceeds target size", () => {
const longCode = Array(10).fill("console.log('line');").join("\n");
const markdown = `
Some intro text.

\`\`\`javascript
${longCode}
\`\`\`

Some outro text.
`.trim();
const targetSize = 50;
const chunks = chunkMarkdown(markdown, targetSize, estimateTokens);

expect(chunks).toHaveLength(3);
expect(chunks[0]!.content).toBe("Some intro text.");
expect(chunks[1]!.content).toContain(longCode);
expect(chunks[1]!.type).toBe("code");
expect(estimateTokens(chunks[1]!.content)).toBeGreaterThan(targetSize);
expect(chunks[2]!.content).toBe("Some outro text.");
});

it("should handle markdown with only code blocks", () => {
const markdown = `
\`\`\`javascript
console.log("first");
\`\`\`

\`\`\`python
print("second")
\`\`\`
`.trim();
const targetSize = 30;
const chunks = chunkMarkdown(markdown, targetSize, estimateTokens);

expect(chunks).toHaveLength(2);
expect(chunks[0]!.content).toContain('console.log("first");');
expect(chunks[0]!.type).toBe("code");
expect(chunks[1]!.content).toContain('print("second")');
expect(chunks[1]!.type).toBe("code");
});

it("should handle empty markdown", () => {
const markdown = "";
const targetSize = 100;
const chunks = chunkMarkdown(markdown, targetSize, estimateTokens);
expect(chunks).toHaveLength(0);
});

it("should handle markdown smaller than target size", () => {
const markdown = "# Title\n\nJust a little bit of text.";
const targetSize = 1000;
const chunks = chunkMarkdown(markdown, targetSize, estimateTokens);
expect(chunks).toHaveLength(1);
expect(chunks[0]!.content).toBe(markdown);
expect(chunks[0]!.type).toBe("text");
});
});
93 changes: 93 additions & 0 deletions src/common/chunker.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import MarkdownIt from 'markdown-it';

export interface MarkdownChunk {
content: string;
type: 'text' | 'code';
}

const md = new MarkdownIt();

export function chunkMarkdown(
markdown: string,
targetTokenSize: number,
estimateTokens: (text: string) => number
): MarkdownChunk[] {
if (!markdown?.trim()) {
return [];
}

const chunks: MarkdownChunk[] = [];
const codeBlockRegex = /(```[\s\S]*?```)/g;
const parts = markdown.split(codeBlockRegex).filter(part => part);

let currentTextChunk = "";

for (const part of parts) {
if (part.startsWith('```') && part.endsWith('```')) {
if (currentTextChunk.trim()) {
chunks.push(...splitTextChunk(currentTextChunk, targetTokenSize, estimateTokens));
currentTextChunk = "";
}
chunks.push({ content: part.trim(), type: 'code' });
} else {
currentTextChunk += part;
}
}

if (currentTextChunk.trim()) {
chunks.push(...splitTextChunk(currentTextChunk, targetTokenSize, estimateTokens));
}

return chunks;
}

function splitTextChunk(
text: string,
targetTokenSize: number,
estimateTokens: (text: string) => number
): MarkdownChunk[] {
const textChunks: MarkdownChunk[] = [];
const separators = ['\n\n', '\n'];
let currentSegments: string[] = [text.trim()];

for (const sep of separators) {
const nextSegments: string[] = [];
let needsFurtherSplitting = false;
for(const segment of currentSegments) {
const estimatedSize = estimateTokens(segment!);
if (estimatedSize > targetTokenSize * 1.2 && segment!.includes(sep)) {
nextSegments.push(...segment!.split(sep).map(s => s.trim()).filter(Boolean));
needsFurtherSplitting = true;
} else {
nextSegments.push(segment);
}
}
currentSegments = nextSegments;
}

let currentChunkContent = "";
for (let i = 0; i < currentSegments.length; i++) {
const segment = currentSegments[i];
const segmentSize = estimateTokens(segment!);
const currentChunkSize = estimateTokens(currentChunkContent);
const combinedSize = estimateTokens(currentChunkContent ? `${currentChunkContent}\n\n${segment!}` : segment!);

if (segmentSize > targetTokenSize * 1.2 && !currentChunkContent) {
textChunks.push({ content: segment!, type: 'text' });
continue;
}

if (currentChunkContent && combinedSize > targetTokenSize * 1.2) {
textChunks.push({ content: currentChunkContent, type: 'text' });
currentChunkContent = segment!;
} else {
currentChunkContent = currentChunkContent ? `${currentChunkContent}\n\n${segment!}` : segment!;
}
}

if (currentChunkContent) {
textChunks.push({ content: currentChunkContent, type: 'text' });
}

return textChunks;
}
36 changes: 36 additions & 0 deletions src/common/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/**
* Common utility functions
*/

export function log(...messages: any[]) {
if (process.env.LOG_LEVEL === "debug") {
console.log(...messages);
}
}

export function processValue(value: unknown): unknown {
if (value === null || value === undefined) {
return value;
}

if (typeof value === 'bigint' || (typeof (value as any).toString === 'function' && typeof (value as any).toJSON === 'undefined')) {
return value.toString();
}

if (Array.isArray(value)) {
return value.map(processValue);
}

if (typeof value === 'object' && value !== null) {
const result: { [key: string]: unknown } = {};
const obj = value as { [key: string]: unknown };
for (const key in obj) {
if (typeof obj[key] !== 'function' && !key.startsWith('_') && key !== 'parent' && key !== 'functionInvocationScopes') {
result[key] = processValue(obj[key]);
}
}
return result;
}

return value;
}
Loading