Skip to content

Commit 2bbbec8

Browse files
committed
add Row Text Splitter node for line-based chunking
1 parent 56f272a commit 2bbbec8

File tree

2 files changed

+132
-0
lines changed

2 files changed

+132
-0
lines changed
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import { INode, INodeData, INodeParams } from '../../../src/Interface'
2+
import { getBaseClasses } from '../../../src/utils'
3+
import { TextSplitter, TextSplitterParams } from '@langchain/textsplitters'
4+
5+
interface RowTextSplitterParams extends TextSplitterParams {
6+
lineSeparator: string
7+
trimWhitespace: boolean
8+
includeEmptyLines: boolean
9+
}
10+
11+
class RowTextSplitter extends TextSplitter implements RowTextSplitterParams {
12+
static lc_name() {
13+
return 'RowTextSplitter'
14+
}
15+
16+
lineSeparator: string
17+
trimWhitespace: boolean
18+
includeEmptyLines: boolean
19+
20+
constructor(fields?: Partial<RowTextSplitterParams>) {
21+
super({
22+
...fields,
23+
chunkSize: Number.MAX_SAFE_INTEGER,
24+
chunkOverlap: 0
25+
})
26+
this.lineSeparator = fields?.lineSeparator ?? '\n'
27+
this.trimWhitespace = fields?.trimWhitespace ?? true
28+
this.includeEmptyLines = fields?.includeEmptyLines ?? false
29+
}
30+
31+
async splitText(text: string): Promise<string[]> {
32+
if (!text) return []
33+
34+
const rawLines = text.split(this.lineSeparator)
35+
const lines: string[] = []
36+
37+
for (let raw of rawLines) {
38+
if (this.lineSeparator === '\n') {
39+
raw = raw.replace(/\r$/, '')
40+
}
41+
42+
const line = this.trimWhitespace ? raw.trim() : raw
43+
44+
if (!this.includeEmptyLines && line.length === 0) {
45+
continue
46+
}
47+
48+
lines.push(line)
49+
}
50+
51+
return lines
52+
}
53+
}
54+
55+
class RowTextSplitter_TextSplitters implements INode {
56+
label: string
57+
name: string
58+
version: number
59+
description: string
60+
type: string
61+
icon: string
62+
category: string
63+
baseClasses: string[]
64+
inputs: INodeParams[]
65+
66+
constructor() {
67+
this.label = 'Row Text Splitter'
68+
this.name = 'rowTextSplitter'
69+
this.version = 1.0
70+
this.type = 'RowTextSplitter'
71+
this.icon = 'rowTextSplitter.svg'
72+
this.category = 'Text Splitters'
73+
this.description = `Splits text into individual rows/lines. Ideal for database table rows, CSV data, or line-based logs.`
74+
this.baseClasses = [this.type, ...getBaseClasses(RowTextSplitter)]
75+
this.inputs = [
76+
{
77+
label: 'Line Separator',
78+
name: 'lineSeparator',
79+
type: 'string',
80+
description: 'Character or string that separates rows. Defaults to newline (\\n).',
81+
placeholder: '\\n',
82+
optional: true
83+
},
84+
{
85+
label: 'Trim Whitespace',
86+
name: 'trimWhitespace',
87+
type: 'boolean',
88+
description: 'Trim whitespace from the start and end of each row.',
89+
default: true,
90+
optional: true,
91+
additionalParams: true
92+
},
93+
{
94+
label: 'Include Empty Lines',
95+
name: 'includeEmptyLines',
96+
type: 'boolean',
97+
description: 'Whether to include empty lines as separate rows.',
98+
default: false,
99+
optional: true,
100+
additionalParams: true
101+
}
102+
]
103+
}
104+
105+
async init(nodeData: INodeData): Promise<any> {
106+
const lineSeparatorInput = (nodeData.inputs?.lineSeparator as string) || ''
107+
const trimWhitespace = (nodeData.inputs?.trimWhitespace as boolean) ?? true
108+
const includeEmptyLines = (nodeData.inputs?.includeEmptyLines as boolean) ?? false
109+
110+
const splitter = new RowTextSplitter({
111+
lineSeparator: this.normalizeSeparator(lineSeparatorInput),
112+
trimWhitespace,
113+
includeEmptyLines
114+
})
115+
116+
return splitter
117+
}
118+
119+
private normalizeSeparator(separator: string): string {
120+
if (!separator) return '\n'
121+
122+
return separator.replace(/\\r/g, '\r').replace(/\\n/g, '\n').replace(/\\t/g, '\t')
123+
}
124+
}
125+
126+
module.exports = { nodeClass: RowTextSplitter_TextSplitters }
Lines changed: 6 additions & 0 deletions
Loading

0 commit comments

Comments
 (0)