keplergl · igorDykhta · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/src/processors/src/data-processor.ts b/src/processors/src/data-processor.ts
@@ -2,7 +2,7 @@
 // Copyright contributors to the kepler.gl project
 
 import * as arrow from 'apache-arrow';
-import {csvParseRows} from 'd3-dsv';
+import {csvParseRows, tsvParseRows, dsvFormat} from 'd3-dsv';
 import {DATA_TYPES as AnalyzerDATA_TYPES} from 'type-analyzer';
 import normalize from '@mapbox/geojson-normalize';
 import {parseSync} from '@loaders.gl/core';
@@ -40,6 +40,39 @@
 // matches empty string
 export const CSV_NULLS = /^(null|NULL|Null|NaN|\/N||)$/;
 
+const SUPPORTED_DELIMITERS = [',', '\t', ';', '|'] as const;
+
+function getRowParser(delimiter: string): (raw: string) => string[][] {
+  if (delimiter === ',') return csvParseRows;
+  if (delimiter === '\t') return tsvParseRows;
+  return dsvFormat(delimiter).parseRows;
+}
+
+/**
+ * Detect the delimiter used in a DSV string by checking the first line.
+ * Returns the delimiter that produces the most columns (minimum 2).
+ * Falls back to comma if no delimiter produces multiple columns.
+ */
+export function detectDelimiter(rawData: string): string {
+  const newlineIdx = rawData.indexOf('\n');
+  const firstLine = newlineIdx === -1 ? rawData : rawData.slice(0, newlineIdx);
+  if (!firstLine) return ',';
+
+  let bestDelimiter = ',';
+  let bestCount = 1;
+
+  for (const delimiter of SUPPORTED_DELIMITERS) {
+    const parsed = getRowParser(delimiter)(firstLine);
+    const count = parsed[0]?.length || 0;
+    if (count > bestCount) {
+      bestCount = count;
+      bestDelimiter = delimiter;
+    }
+  }
+
+  return bestDelimiter;
+}
+
 function tryParseJsonString(str) {
   try {
     return JSON.parse(str);
@@ -119,11 +152,11 @@
   let headerRow: string[] | undefined;
 
   if (typeof rawData === 'string') {
-    const parsedRows: string[][] = csvParseRows(rawData);
+    const delimiter = detectDelimiter(rawData);
+    const parsedRows: string[][] = getRowParser(delimiter)(rawData);
 
     if (!Array.isArray(parsedRows) || parsedRows.length < 2) {
-      // looks like an empty file, throw error to be catch
-      throw new Error('process Csv Data Failed: CSV is empty');
+      throw new Error('processCsvData Failed: delimited text is empty or has no data rows');
     }
     headerRow = parsedRows[0];
     rows = parsedRows.slice(1);
@@ -550,7 +583,7 @@
  const newColumns: Record<string, arrow.Vector> = {};
  for (let i = 0; i < arrowTable.numCols; i++) {
    const field = arrowTable.schema.fields[i];
    const col = arrowTable.getChildAt(i)!;
    if (arrow.DataType.isInt(field.type) && field.type.bitWidth === 64) {
      const float64Array = new Float64Array(col.length);
      for (let j = 0; j < col.length; j++) {

diff --git a/src/reducers/src/vis-state-selectors.ts b/src/reducers/src/vis-state-selectors.ts
@@ -4,8 +4,8 @@
 import {createSelector} from 'reselect';
 
 // NOTE: default formats must match file-handler-test.js
-const DEFAULT_FILE_EXTENSIONS = ['csv', 'json', 'geojson', 'arrow', 'parquet'];
-const DEFAULT_FILE_FORMATS = ['CSV', 'Json', 'GeoJSON', 'Arrow', 'Parquet'];
+const DEFAULT_FILE_EXTENSIONS = ['csv', 'tsv', 'dsv', 'json', 'geojson', 'arrow', 'parquet'];
+const DEFAULT_FILE_FORMATS = ['CSV', 'TSV', 'DSV', 'Json', 'GeoJSON', 'Arrow', 'Parquet'];
 
 interface LoaderInfo {
   name: string;

diff --git a/test/node/utils/data-processor-test.js b/test/node/utils/data-processor-test.js
@@ -30,7 +30,8 @@ import {
   parseCsvRowsByFieldType,
   processCsvData,
   processGeojson,
-  processRowObject
+  processRowObject,
+  detectDelimiter
 } from '@kepler.gl/processors';
 
 import {validateInputData, createDataContainer} from '@kepler.gl/utils';
@@ -152,6 +153,260 @@ test('Processor -> processCsvData', t => {
   t.end();
 });
 
+test('Processor -> detectDelimiter', t => {
+  t.equal(detectDelimiter('a,b,c\n1,2,3'), ',', 'should detect comma delimiter');
+  t.equal(detectDelimiter('a\tb\tc\n1\t2\t3'), '\t', 'should detect tab delimiter');
+  t.equal(detectDelimiter('a;b;c\n1;2;3'), ';', 'should detect semicolon delimiter');
+  t.equal(detectDelimiter('a|b|c\n1|2|3'), '|', 'should detect pipe delimiter');
+
+  t.equal(
+    detectDelimiter('single_column\nvalue'),
+    ',',
+    'should fall back to comma when no delimiter produces multiple columns'
+  );
+  t.equal(detectDelimiter(''), ',', 'should return comma for empty string');
+  t.equal(
+    detectDelimiter('a,b,c'),
+    ',',
+    'should handle input without newline (single line)'
+  );
+
+  t.equal(
+    detectDelimiter('"a\tb"\tc\td\n1\t2\t3'),
+    '\t',
+    'should handle quoted fields containing other delimiters'
+  );
+  t.equal(
+    detectDelimiter('"city, state"\tpopulation\tarea\n"New York, NY"\t8000000\t302'),
+    '\t',
+    'should detect tab even when commas appear inside quoted fields'
+  );
+
+  t.equal(
+    detectDelimiter('name;age;city\nAlice;30;"Berlin, Germany"'),
+    ';',
+    'should detect semicolon with quoted fields containing commas'
+  );
+
+  t.equal(
+    detectDelimiter('a\tb\tc\td\te\n1\t2\t3\t4\t5'),
+    '\t',
+    'should prefer delimiter that produces more columns'
+  );
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> tab-separated', t => {
+  const tsvData = 'name\tage\tcity\nAlice\t30\tBerlin\nBob\t25\tParis';
+  const result = processCsvData(tsvData);
+
+  t.equal(result.fields.length, 3, 'should parse 3 fields from TSV');
+  t.equal(result.fields[0].name, 'name', 'first field should be name');
+  t.equal(result.fields[1].name, 'age', 'second field should be age');
+  t.equal(result.fields[2].name, 'city', 'third field should be city');
+  t.equal(result.rows.length, 2, 'should have 2 data rows');
+  t.deepEqual(result.rows[0], ['Alice', 30, 'Berlin'], 'should parse first row correctly');
+  t.deepEqual(result.rows[1], ['Bob', 25, 'Paris'], 'should parse second row correctly');
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> semicolon-separated', t => {
+  const ssvData = 'name;value;active\nfoo;100;true\nbar;200;false';
+  const result = processCsvData(ssvData);
+
+  t.equal(result.fields.length, 3, 'should parse 3 fields from semicolon-separated data');
+  t.equal(result.fields[0].name, 'name', 'first field should be name');
+  t.equal(result.fields[1].name, 'value', 'second field should be value');
+  t.equal(result.fields[2].name, 'active', 'third field should be active');
+  t.equal(result.rows.length, 2, 'should have 2 data rows');
+  t.deepEqual(result.rows[0], ['foo', 100, true], 'should parse first row correctly');
+  t.deepEqual(result.rows[1], ['bar', 200, false], 'should parse second row correctly');
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> pipe-separated', t => {
+  const psvData = 'id|name|score\n1|Alice|95.5\n2|Bob|87.3';
+  const result = processCsvData(psvData);
+
+  t.equal(result.fields.length, 3, 'should parse 3 fields from pipe-separated data');
+  t.equal(result.fields[0].name, 'id', 'first field should be id');
+  t.equal(result.fields[1].name, 'name', 'second field should be name');
+  t.equal(result.fields[2].name, 'score', 'third field should be score');
+  t.equal(result.rows.length, 2, 'should have 2 data rows');
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> semicolon with quoted commas', t => {
+  const data = '"City, Country";Population;Area\n"Berlin, Germany";3600000;891\n"Paris, France";2100000;105';
+  const result = processCsvData(data);
+
+  t.equal(result.fields.length, 3, 'should parse 3 fields');
+  t.equal(result.fields[0].name, 'City, Country', 'should preserve comma inside quotes');
+  t.equal(result.rows.length, 2, 'should have 2 data rows');
+  t.equal(result.rows[0][0], 'Berlin, Germany', 'should preserve quoted value with comma');
+
+  t.end();
+});
+
+test('Processor -> detectDelimiter -> Windows line endings (CRLF)', t => {
+  t.equal(
+    detectDelimiter('a\tb\tc\r\n1\t2\t3\r\n'),
+    '\t',
+    'should detect tab delimiter with CRLF line endings'
+  );
+  t.equal(
+    detectDelimiter('a;b;c\r\n1;2;3\r\n'),
+    ';',
+    'should detect semicolon delimiter with CRLF line endings'
+  );
+
+  t.end();
+});
+
+test('Processor -> detectDelimiter -> ambiguous cases', t => {
+  t.equal(
+    detectDelimiter('a,b\tc,d\n1,2\t3,4'),
+    ',',
+    'should prefer comma when comma produces more columns than tab'
+  );
+  t.equal(
+    detectDelimiter('a\tb\tc\td,e\n1\t2\t3\t4,5'),
+    '\t',
+    'should prefer tab when tab produces more columns than comma'
+  );
+  t.equal(
+    detectDelimiter('a;b;c;d|e\n1;2;3;4|5'),
+    ';',
+    'should prefer semicolon when it produces more columns than pipe'
+  );
+
+  t.end();
+});
+
+test('Processor -> detectDelimiter -> trailing and leading whitespace', t => {
+  t.equal(
+    detectDelimiter('  a\tb\tc  \n  1\t2\t3  '),
+    '\t',
+    'should detect tab even with surrounding whitespace'
+  );
+  t.equal(
+    detectDelimiter(' a ; b ; c \n 1 ; 2 ; 3 '),
+    ';',
+    'should detect semicolon even with spaces around values'
+  );
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> tab-separated with empty fields', t => {
+  const tsvData = 'name\tage\tcity\nAlice\t\tBerlin\n\t25\t';
+  const result = processCsvData(tsvData);
+
+  t.equal(result.fields.length, 3, 'should parse 3 fields');
+  t.equal(result.rows.length, 2, 'should have 2 data rows');
+  t.equal(result.rows[0][0], 'Alice', 'first row first value should be Alice');
+  t.equal(result.rows[0][1], null, 'first row second value should be null (empty)');
+  t.equal(result.rows[0][2], 'Berlin', 'first row third value should be Berlin');
+  t.equal(result.rows[1][0], null, 'second row first value should be null (empty)');
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> tab-separated with many columns', t => {
+  const headers = Array.from({length: 20}, (_, i) => `col${i}`).join('\t');
+  const row1 = Array.from({length: 20}, (_, i) => `val${i}`).join('\t');
+  const row2 = Array.from({length: 20}, (_, i) => `row2_${i}`).join('\t');
+  const tsvData = `${headers}\n${row1}\n${row2}`;
+  const result = processCsvData(tsvData);
+
+  t.equal(result.fields.length, 20, 'should parse 20 fields from wide TSV');
+  t.equal(result.fields[0].name, 'col0', 'first field should be col0');
+  t.equal(result.fields[19].name, 'col19', 'last field should be col19');
+  t.equal(result.rows.length, 2, 'should have 2 data rows');
+  t.equal(result.rows[0][0], 'val0', 'first cell should be val0');
+  t.equal(result.rows[0][19], 'val19', 'last cell should be val19');
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> semicolon-separated with numeric data', t => {
+  const data = 'lat;lng;value\n52.52;13.405;1000.5\n48.8566;2.3522;2000.7\n40.4168;-3.7038;1500.3';
+  const result = processCsvData(data);
+
+  t.equal(result.fields.length, 3, 'should parse 3 fields');
+  t.equal(result.rows.length, 3, 'should have 3 data rows');
+  t.equal(result.rows[0][0], 52.52, 'should parse lat as number');
+  t.equal(result.rows[0][1], 13.405, 'should parse lng as number');
+  t.equal(result.rows[0][2], 1000.5, 'should parse value as number');
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> pipe-separated with special characters in values', t => {
+  const data = 'id|description|url\n1|"hello, world"|http://example.com\n2|"foo; bar"|http://test.org';
+  const result = processCsvData(data);
+
+  t.equal(result.fields.length, 3, 'should parse 3 fields');
+  t.equal(result.rows[0][1], 'hello, world', 'should handle commas inside quoted pipe-separated fields');
+  t.equal(result.rows[1][1], 'foo; bar', 'should handle semicolons inside quoted pipe-separated fields');
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> tab-separated preserves original comma data', t => {
+  const csvData = 'a,b,c\n1,2,3\n4,5,6';
+  const result = processCsvData(csvData);
+
+  t.equal(result.fields.length, 3, 'regular CSV should still parse correctly');
+  t.equal(result.fields[0].name, 'a', 'field name should be a');
+  t.deepEqual(result.rows[0], [1, 2, 3], 'first row should be [1,2,3]');
+  t.deepEqual(result.rows[1], [4, 5, 6], 'second row should be [4,5,6]');
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> trailing newline does not break parsing', t => {
+  const tsvData = 'name\tage\nAlice\t30\nBob\t25\n';
+  const result = processCsvData(tsvData);
+
+  t.equal(result.fields.length, 2, 'should parse 2 fields');
+  t.equal(result.rows.length, 2, 'should have 2 data rows (trailing newline ignored)');
+
+  const ssvData = 'x;y;z\n1;2;3\n4;5;6\n';
+  const result2 = processCsvData(ssvData);
+
+  t.equal(result2.fields.length, 3, 'semicolon: should parse 3 fields');
+  t.equal(result2.rows.length, 2, 'semicolon: trailing newline should not add empty row');
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> single data row (header + 1 row)', t => {
+  const tsvData = 'x\ty\tz\n10\t20\t30';
+  const result = processCsvData(tsvData);
+
+  t.equal(result.fields.length, 3, 'should parse 3 fields');
+  t.equal(result.rows.length, 1, 'should have 1 data row');
+  t.deepEqual(result.rows[0], [10, 20, 30], 'should parse the single row correctly');
+
+  t.end();
+});
+
+test('Processor -> processCsvData -> timestamps in TSV', t => {
+  const tsvData = 'timestamp\tvalue\n2023-01-15 10:30:00\t100\n2023-02-20 14:45:30\t200';
+  const result = processCsvData(tsvData);
+
+  t.equal(result.fields.length, 2, 'should parse 2 fields');
+  t.equal(result.fields[0].type, 'timestamp', 'should detect timestamp type');
+  t.equal(result.rows.length, 2, 'should have 2 rows');
+
+  t.end();
+});
+
 test('Processor -> processCsvData -> duplicated field name', t => {
   const testData1 = `column1,column1,column1,column2\na,b,c,d\nc,d,e,f`;