Skip to content

Commit e79c806

Browse files
authored
Merge pull request #38 from juarezr/feat/csv
Parse tables from CSV, TSV, and fixed space delimted formats
2 parents 6a8cccb + c32dc6c commit e79c806

1 file changed

Lines changed: 79 additions & 6 deletions

File tree

src/parse-table.js

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ function parseTable(inputString) {
141141
// each column
142142
data.forEach((row) => {
143143
row.forEach((value, colIndex) => {
144-
if (value === "") {
144+
if (value === null || value === undefined || value === "") {
145145
// Ignore empty values
146146
return;
147147
}
@@ -170,7 +170,7 @@ function parseTable(inputString) {
170170
}
171171
}
172172
});
173-
173+
174174
// Check if all values in a string column are boolean
175175
columnTypes.forEach((type, colIndex) => {
176176
if (type === "string") {
@@ -186,7 +186,7 @@ function parseTable(inputString) {
186186
const convertedData = data.map((row) =>
187187
row.map((value, colIndex) =>
188188
columnTypes[colIndex] !== "string" &&
189-
columnTypes[colIndex] !== "boolean"
189+
columnTypes[colIndex] !== "boolean"
190190
? utils.convertValue(value)
191191
: value
192192
)
@@ -205,11 +205,84 @@ function parseTable(inputString) {
205205
function parseTextTable(textString) {
206206
// Split the input by line breaks for rows
207207
const rows = textString.trim().split(/\r?\n/);
208+
const rlen = rows.length;
209+
const len2 = rlen - 2;
208210

209-
// Split each row by tab delimiters or by space and tab
210-
const matrix = rows.map((row) => row.split(/\t|\s\t/));
211+
// Delimiters: TAB (spreadsheets, IDEs), comma (CSV), semicolon (CSV), pipe (TSV)
212+
// or by spaces (fixed width)
213+
const patterns = ['\\t|\\s\\t', ',', ';', '\\|', '\\s+'];
214+
let results = [];
215+
let columns = [];
216+
// Finds the best pattern to split the table
217+
for (let i = 0; i < patterns.length; i++) {
218+
let pattern = patterns[i];
219+
let regex = new RegExp(pattern, 'gm');
220+
let matrix = rows.map((row) => row.split(regex));
221+
let cols = getNumSplitRows(matrix, i);
222+
// Check if the pattern perfectly split all rows with same number of columns
223+
if (cols[0] >= rlen && cols[1] >= rlen) {
224+
return matrix;
225+
}
226+
results[i] = matrix;
227+
columns[i] = cols;
228+
}
229+
// Choose the pattern that best splits the table
230+
const sorted = columns.sort(sortByBestRowSplit);
231+
const best = sorted[0][2];
232+
const res = results[best];
233+
// Append empty cells to make the table rectangular and avoid errors while converting
234+
const maxCols = getMaxCols(res);;
235+
const normalized = res.map((row) => {
236+
if (row.length < maxCols) {
237+
const diff = maxCols - row.length;
238+
return row.concat(new Array(diff).fill(""));
239+
}
240+
return row;
241+
});
242+
return normalized;
243+
}
211244

212-
return matrix;
245+
function sortByBestRowSplit(a, b) {
246+
let res = b[0] - a[0]; // More rows with same number of columns (DESC)
247+
if (res == 0) {
248+
res = b[1] - a[1]; // More rows with columns split by the pattern (DESC)
249+
if (res == 0) {
250+
res = a[2] - b[2]; // Pattern order (ASC) gives TAB
251+
}
252+
}
253+
return res
254+
}
255+
256+
function getMaxCols(matrix) {
257+
let maxCols = 0;
258+
let numRows = matrix.length;
259+
for (let i = 0; i < numRows; i++) {
260+
let cols = matrix[i].length;
261+
if (cols > maxCols) {
262+
maxCols = cols;
263+
}
264+
}
265+
return maxCols;
266+
}
267+
268+
function getNumSplitRows(matrix, index) {
269+
let numRowSplit = 0;
270+
let numColsEqual = 0;
271+
let numCols = -1;
272+
let numRows = matrix.length;
273+
for (let i = 0; i < numRows; i++) {
274+
let cols = matrix[i].length;
275+
if (cols > 1) {
276+
numRowSplit += 1;
277+
if (numCols <= 0) {
278+
numCols = cols;
279+
numColsEqual = 1; // First row establishes the column count
280+
} else if (cols == numCols) {
281+
numColsEqual += 1;
282+
}
283+
}
284+
}
285+
return [numColsEqual, numRowSplit, index];
213286
}
214287

215288
module.exports = {

0 commit comments

Comments
 (0)