Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 79 additions & 6 deletions src/parse-table.js
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ function parseTable(inputString) {
// each column
data.forEach((row) => {
row.forEach((value, colIndex) => {
if (value === "") {
if (value === null || value === undefined || value === "") {
// Ignore empty values
return;
}
Expand Down Expand Up @@ -170,7 +170,7 @@ function parseTable(inputString) {
}
}
});

// Check if all values in a string column are boolean
columnTypes.forEach((type, colIndex) => {
if (type === "string") {
Expand All @@ -186,7 +186,7 @@ function parseTable(inputString) {
const convertedData = data.map((row) =>
row.map((value, colIndex) =>
columnTypes[colIndex] !== "string" &&
columnTypes[colIndex] !== "boolean"
columnTypes[colIndex] !== "boolean"
? utils.convertValue(value)
: value
)
Expand All @@ -205,11 +205,84 @@ function parseTable(inputString) {
function parseTextTable(textString) {
// Split the input by line breaks for rows
const rows = textString.trim().split(/\r?\n/);
const rlen = rows.length;
const len2 = rlen - 2;

// Split each row by tab delimiters or by space and tab
const matrix = rows.map((row) => row.split(/\t|\s\t/));
// Delimiters: TAB (spreadsheets, IDEs), comma (CSV), semicolon (CSV), pipe (TSV)
// or by spaces (fixed width)
const patterns = ['\\t|\\s\\t', ',', ';', '\\|', '\\s+'];
let results = [];
let columns = [];
// Finds the best pattern to split the table
for (let i = 0; i < patterns.length; i++) {
let pattern = patterns[i];
let regex = new RegExp(pattern, 'gm');
let matrix = rows.map((row) => row.split(regex));
let cols = getNumSplitRows(matrix, i);
// Check if the pattern perfectly split all rows with same number of columns
if (cols[0] >= rlen && cols[1] >= rlen) {
return matrix;
}
results[i] = matrix;
columns[i] = cols;
}
// Choose the pattern that best splits the table
const sorted = columns.sort(sortByBestRowSplit);
const best = sorted[0][2];
const res = results[best];
// Append empty cells to make the table rectangular and avoid errors while converting
const maxCols = getMaxCols(res);;
const normalized = res.map((row) => {
if (row.length < maxCols) {
const diff = maxCols - row.length;
return row.concat(new Array(diff).fill(""));
}
return row;
});
return normalized;
}

return matrix;
function sortByBestRowSplit(a, b) {
let res = b[0] - a[0]; // More rows with same number of columns (DESC)
if (res == 0) {
res = b[1] - a[1]; // More rows with columns split by the pattern (DESC)
if (res == 0) {
res = a[2] - b[2]; // Pattern order (ASC) gives TAB
}
}
return res
}

function getMaxCols(matrix) {
let maxCols = 0;
let numRows = matrix.length;
for (let i = 0; i < numRows; i++) {
let cols = matrix[i].length;
if (cols > maxCols) {
maxCols = cols;
}
}
return maxCols;
}

function getNumSplitRows(matrix, index) {
let numRowSplit = 0;
let numColsEqual = 0;
let numCols = -1;
let numRows = matrix.length;
for (let i = 0; i < numRows; i++) {
let cols = matrix[i].length;
if (cols > 1) {
numRowSplit += 1;
if (numCols <= 0) {
numCols = cols;
numColsEqual = 1; // First row establishes the column count
} else if (cols == numCols) {
numColsEqual += 1;
}
}
}
return [numColsEqual, numRowSplit, index];
Comment thread
juarezr marked this conversation as resolved.
}

module.exports = {
Expand Down