Skip to content

Commit c81445a

Browse files
committed
Parse tables from CSV, TSV, and fixed space delimted formats
1 parent 8becd75 commit c81445a

1 file changed

Lines changed: 75 additions & 3 deletions

File tree

src/parse-table.js

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,11 +205,83 @@ function parseTable(inputString) {
205205
function parseTextTable(textString) {
206206
// Split the input by line breaks for rows
207207
const rows = textString.trim().split(/\r?\n/);
208+
const rlen = rows.length;
209+
const len2 = rlen - 2;
208210

209-
// Split each row by tab delimiters or by space and tab
210-
const matrix = rows.map((row) => row.split(/\t|\s\t/));
211+
// Delimiters: TAB (spreadsheets, IDEs), comma (CSV), semicolon (CSV), pipe (TSV)
212+
// or by spaces (fixed width)
213+
const patterns = [ '\\t|\\s\\t', ',', ';', '\\|', '\\s+' ];
214+
let results = [];
215+
let columns = [];
216+
// Finds the best pattern to split the table
217+
for (let i = 0; i < patterns.length; i++) {
218+
let pattern = patterns[i];
219+
let regex = new RegExp(pattern, 'gm');
220+
let matrix = rows.map((row) => row.split(regex));
221+
let cols = getNumSplitRows(matrix, i);
222+
// Check if the pattern perfectly split all rows with same number of columns
223+
if ((cols[0] > 1) && (cols[0] >= len2) && cols[1] >= rlen) {
224+
return matrix;
225+
}
226+
results[i] = matrix;
227+
columns[i] = cols;
228+
}
229+
// Choose the pattern that best splits the table
230+
const sorted = columns.sort(sortByBestRowSplit);
231+
const best = sorted[0][2];
232+
const res = results[best];
233+
// Append empty cells to make the table rectangular and avoid errors while converting
234+
const maxCols = getMaxCols(res);;
235+
const normalized = res.map((row) => {
236+
if (row.length < maxCols) {
237+
const diff = maxCols - row.length;
238+
return row.concat(new Array(diff).fill(""));
239+
}
240+
return row;
241+
});
242+
return normalized;
243+
}
211244

212-
return matrix;
245+
function sortByBestRowSplit(a, b) {
246+
let res = b[0] - a[0]; // More rows with same number of columns (DESC)
247+
if (res == 0) {
248+
res = b[1] - a[1]; // More rows with columns split by the pattern (DESC)
249+
if (res == 0) {
250+
res = a[2] - b[2]; // Pattern order (ASC) gives TAB
251+
}
252+
}
253+
return res
254+
}
255+
256+
function getMaxCols(matrix) {
257+
let maxCols = 0;
258+
let numRows = matrix.length;
259+
for (let i = 0; i < numRows; i++) {
260+
let cols = matrix[i].length;
261+
if (cols > maxCols) {
262+
maxCols = cols;
263+
}
264+
}
265+
return maxCols;
266+
}
267+
268+
function getNumSplitRows(matrix, index) {
269+
let numRowSplit = 0;
270+
let numColsEqual = -1;
271+
let numCols = -1;
272+
let numRows = matrix.length;
273+
for (let i = 0; i < numRows; i++) {
274+
let cols = matrix[i].length;
275+
if (cols > 1) {
276+
numRowSplit += 1;
277+
if (numCols <= 0) {
278+
numCols = cols;
279+
} else if (cols == numCols) {
280+
numColsEqual += 1;
281+
}
282+
}
283+
}
284+
return [numColsEqual, numRowSplit, index];
213285
}
214286

215287
module.exports = {

0 commit comments

Comments
 (0)