From ae3fb6b4b53f23492bd05e5703d0496f313bb71f Mon Sep 17 00:00:00 2001 From: pierreeurope Date: Thu, 12 Feb 2026 08:25:27 +0100 Subject: [PATCH] feat(processors): auto-detect delimiter for CSV/TSV/DSV files Add automatic delimiter detection to processCsvData so that files using tabs, semicolons, or pipe characters as delimiters are parsed correctly without requiring any user configuration. The detectDelimiter function checks the first line of the input against supported delimiters (comma, tab, semicolon, pipe) and picks the one that produces the most columns, using d3-dsv for proper handling of quoted fields. Also adds .tsv and .dsv file extensions to the accepted file formats so users can drag-and-drop these files directly. Fixes #202 Related: #168 Signed-off-by: pierreeurope --- src/processors/src/data-processor.ts | 35 +++++++++++++++++++++++-- src/reducers/src/vis-state-selectors.ts | 4 +-- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/processors/src/data-processor.ts b/src/processors/src/data-processor.ts index d18a61bf7f..a9c58188b1 100644 --- a/src/processors/src/data-processor.ts +++ b/src/processors/src/data-processor.ts @@ -2,7 +2,7 @@ // Copyright contributors to the kepler.gl project import * as arrow from 'apache-arrow'; -import {csvParseRows} from 'd3-dsv'; +import {csvParseRows, tsvParseRows, dsvFormat} from 'd3-dsv'; import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer'; import normalize from '@mapbox/geojson-normalize'; import {parseSync} from '@loaders.gl/core'; @@ -40,6 +40,35 @@ import {Feature} from '@nebula.gl/edit-modes'; // matches empty string export const CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/; +// Supported delimiters for auto-detection, ordered by priority +const DELIMITERS = [',', '\t', ';', '|'] as const; + +/** + * Detect the delimiter used in a DSV string by checking the first line. + * Returns the delimiter that produces the most columns (minimum 2). + * Falls back to comma if no delimiter produces multiple columns. + */ +export function detectDelimiter(rawData: string): string { + const firstLine = rawData.slice(0, rawData.indexOf('\n')); + if (!firstLine) return ','; + + let bestDelimiter = ','; + let bestCount = 1; + + for (const delimiter of DELIMITERS) { + // Use d3-dsv to properly parse the first line (handles quoted fields) + const parseRows = delimiter === ',' ? csvParseRows : delimiter === '\t' ? tsvParseRows : dsvFormat(delimiter).parseRows; + const parsed = parseRows(firstLine); + const count = parsed[0]?.length || 0; + if (count > bestCount) { + bestCount = count; + bestDelimiter = delimiter; + } + } + + return bestDelimiter; +} + function tryParseJsonString(str) { try { return JSON.parse(str); @@ -116,7 +145,9 @@ export function processCsvData(rawData: unknown[][] | string, header?: string[]) let headerRow: string[] | undefined; if (typeof rawData === 'string') { - const parsedRows: string[][] = csvParseRows(rawData); + const delimiter = detectDelimiter(rawData); + const parseRows = delimiter === ',' ? csvParseRows : delimiter === '\t' ? tsvParseRows : dsvFormat(delimiter).parseRows; + const parsedRows: string[][] = parseRows(rawData); if (!Array.isArray(parsedRows) || parsedRows.length < 2) { // looks like an empty file, throw error to be catch diff --git a/src/reducers/src/vis-state-selectors.ts b/src/reducers/src/vis-state-selectors.ts index 25efef1dd9..d8793fdc41 100644 --- a/src/reducers/src/vis-state-selectors.ts +++ b/src/reducers/src/vis-state-selectors.ts @@ -4,8 +4,8 @@ import {createSelector} from 'reselect'; // NOTE: default formats must match file-handler-test.js -const DEFAULT_FILE_EXTENSIONS = ['csv', 'json', 'geojson', 'arrow', 'parquet']; -const DEFAULT_FILE_FORMATS = ['CSV', 'Json', 'GeoJSON', 'Arrow', 'Parquet']; +const DEFAULT_FILE_EXTENSIONS = ['csv', 'tsv', 'dsv', 'json', 'geojson', 'arrow', 'parquet']; +const DEFAULT_FILE_FORMATS = ['CSV', 'TSV', 'DSV', 'Json', 'GeoJSON', 'Arrow', 'Parquet']; export const getFileFormatNames = createSelector( state => state.loaders,