|
| 1 | +import { INode, INodeData, INodeParams } from '../../../src/Interface' |
| 2 | +import { getBaseClasses } from '../../../src/utils' |
| 3 | +import { TextSplitter, TextSplitterParams } from '@langchain/textsplitters' |
| 4 | + |
| 5 | +interface RowTextSplitterParams extends TextSplitterParams { |
| 6 | + lineSeparator: string |
| 7 | + trimWhitespace: boolean |
| 8 | + includeEmptyLines: boolean |
| 9 | +} |
| 10 | + |
| 11 | +class RowTextSplitter extends TextSplitter implements RowTextSplitterParams { |
| 12 | + static lc_name() { |
| 13 | + return 'RowTextSplitter' |
| 14 | + } |
| 15 | + |
| 16 | + lineSeparator: string |
| 17 | + trimWhitespace: boolean |
| 18 | + includeEmptyLines: boolean |
| 19 | + |
| 20 | + constructor(fields?: Partial<RowTextSplitterParams>) { |
| 21 | + super({ |
| 22 | + ...fields, |
| 23 | + chunkSize: Number.MAX_SAFE_INTEGER, |
| 24 | + chunkOverlap: 0 |
| 25 | + }) |
| 26 | + this.lineSeparator = fields?.lineSeparator ?? '\n' |
| 27 | + this.trimWhitespace = fields?.trimWhitespace ?? true |
| 28 | + this.includeEmptyLines = fields?.includeEmptyLines ?? false |
| 29 | + } |
| 30 | + |
| 31 | + async splitText(text: string): Promise<string[]> { |
| 32 | + if (!text) return [] |
| 33 | + |
| 34 | + const rawLines = text.split(this.lineSeparator) |
| 35 | + const lines: string[] = [] |
| 36 | + |
| 37 | + for (let raw of rawLines) { |
| 38 | + if (this.lineSeparator === '\n') { |
| 39 | + raw = raw.replace(/\r$/, '') |
| 40 | + } |
| 41 | + |
| 42 | + const line = this.trimWhitespace ? raw.trim() : raw |
| 43 | + |
| 44 | + if (!this.includeEmptyLines && line.length === 0) { |
| 45 | + continue |
| 46 | + } |
| 47 | + |
| 48 | + lines.push(line) |
| 49 | + } |
| 50 | + |
| 51 | + return lines |
| 52 | + } |
| 53 | +} |
| 54 | + |
| 55 | +class RowTextSplitter_TextSplitters implements INode { |
| 56 | + label: string |
| 57 | + name: string |
| 58 | + version: number |
| 59 | + description: string |
| 60 | + type: string |
| 61 | + icon: string |
| 62 | + category: string |
| 63 | + baseClasses: string[] |
| 64 | + inputs: INodeParams[] |
| 65 | + |
| 66 | + constructor() { |
| 67 | + this.label = 'Row Text Splitter' |
| 68 | + this.name = 'rowTextSplitter' |
| 69 | + this.version = 1.0 |
| 70 | + this.type = 'RowTextSplitter' |
| 71 | + this.icon = 'rowTextSplitter.svg' |
| 72 | + this.category = 'Text Splitters' |
| 73 | + this.description = `Splits text into individual rows/lines. Ideal for database table rows, CSV data, or line-based logs.` |
| 74 | + this.baseClasses = [this.type, ...getBaseClasses(RowTextSplitter)] |
| 75 | + this.inputs = [ |
| 76 | + { |
| 77 | + label: 'Line Separator', |
| 78 | + name: 'lineSeparator', |
| 79 | + type: 'string', |
| 80 | + description: 'Character or string that separates rows. Defaults to newline (\\n).', |
| 81 | + placeholder: '\\n', |
| 82 | + optional: true |
| 83 | + }, |
| 84 | + { |
| 85 | + label: 'Trim Whitespace', |
| 86 | + name: 'trimWhitespace', |
| 87 | + type: 'boolean', |
| 88 | + description: 'Trim whitespace from the start and end of each row.', |
| 89 | + default: true, |
| 90 | + optional: true, |
| 91 | + additionalParams: true |
| 92 | + }, |
| 93 | + { |
| 94 | + label: 'Include Empty Lines', |
| 95 | + name: 'includeEmptyLines', |
| 96 | + type: 'boolean', |
| 97 | + description: 'Whether to include empty lines as separate rows.', |
| 98 | + default: false, |
| 99 | + optional: true, |
| 100 | + additionalParams: true |
| 101 | + } |
| 102 | + ] |
| 103 | + } |
| 104 | + |
| 105 | + async init(nodeData: INodeData): Promise<any> { |
| 106 | + const lineSeparatorInput = (nodeData.inputs?.lineSeparator as string) || '' |
| 107 | + const trimWhitespace = (nodeData.inputs?.trimWhitespace as boolean) ?? true |
| 108 | + const includeEmptyLines = (nodeData.inputs?.includeEmptyLines as boolean) ?? false |
| 109 | + |
| 110 | + const splitter = new RowTextSplitter({ |
| 111 | + lineSeparator: this.normalizeSeparator(lineSeparatorInput), |
| 112 | + trimWhitespace, |
| 113 | + includeEmptyLines |
| 114 | + }) |
| 115 | + |
| 116 | + return splitter |
| 117 | + } |
| 118 | + |
| 119 | + private normalizeSeparator(separator: string): string { |
| 120 | + if (!separator) return '\n' |
| 121 | + |
| 122 | + return separator.replace(/\\r/g, '\r').replace(/\\n/g, '\n').replace(/\\t/g, '\t') |
| 123 | + } |
| 124 | +} |
| 125 | + |
| 126 | +module.exports = { nodeClass: RowTextSplitter_TextSplitters } |
0 commit comments