@@ -141,7 +141,7 @@ function parseTable(inputString) {
141141 // each column
142142 data . forEach ( ( row ) => {
143143 row . forEach ( ( value , colIndex ) => {
144- if ( value === "" ) {
144+ if ( value === null || value === undefined || value === "" ) {
145145 // Ignore empty values
146146 return ;
147147 }
@@ -170,7 +170,7 @@ function parseTable(inputString) {
170170 }
171171 }
172172 } ) ;
173-
173+
174174 // Check if all values in a string column are boolean
175175 columnTypes . forEach ( ( type , colIndex ) => {
176176 if ( type === "string" ) {
@@ -186,7 +186,7 @@ function parseTable(inputString) {
186186 const convertedData = data . map ( ( row ) =>
187187 row . map ( ( value , colIndex ) =>
188188 columnTypes [ colIndex ] !== "string" &&
189- columnTypes [ colIndex ] !== "boolean"
189+ columnTypes [ colIndex ] !== "boolean"
190190 ? utils . convertValue ( value )
191191 : value
192192 )
@@ -205,11 +205,84 @@ function parseTable(inputString) {
205205function parseTextTable ( textString ) {
206206 // Split the input by line breaks for rows
207207 const rows = textString . trim ( ) . split ( / \r ? \n / ) ;
208+ const rlen = rows . length ;
209+ const len2 = rlen - 2 ;
208210
209- // Split each row by tab delimiters or by space and tab
210- const matrix = rows . map ( ( row ) => row . split ( / \t | \s \t / ) ) ;
211+ // Delimiters: TAB (spreadsheets, IDEs), comma (CSV), semicolon (CSV), pipe (TSV)
212+ // or by spaces (fixed width)
213+ const patterns = [ '\\t|\\s\\t' , ',' , ';' , '\\|' , '\\s+' ] ;
214+ let results = [ ] ;
215+ let columns = [ ] ;
216+ // Finds the best pattern to split the table
217+ for ( let i = 0 ; i < patterns . length ; i ++ ) {
218+ let pattern = patterns [ i ] ;
219+ let regex = new RegExp ( pattern , 'gm' ) ;
220+ let matrix = rows . map ( ( row ) => row . split ( regex ) ) ;
221+ let cols = getNumSplitRows ( matrix , i ) ;
222+ // Check if the pattern perfectly split all rows with same number of columns
223+ if ( cols [ 0 ] >= rlen && cols [ 1 ] >= rlen ) {
224+ return matrix ;
225+ }
226+ results [ i ] = matrix ;
227+ columns [ i ] = cols ;
228+ }
229+ // Choose the pattern that best splits the table
230+ const sorted = columns . sort ( sortByBestRowSplit ) ;
231+ const best = sorted [ 0 ] [ 2 ] ;
232+ const res = results [ best ] ;
233+ // Append empty cells to make the table rectangular and avoid errors while converting
234+ const maxCols = getMaxCols ( res ) ; ;
235+ const normalized = res . map ( ( row ) => {
236+ if ( row . length < maxCols ) {
237+ const diff = maxCols - row . length ;
238+ return row . concat ( new Array ( diff ) . fill ( "" ) ) ;
239+ }
240+ return row ;
241+ } ) ;
242+ return normalized ;
243+ }
211244
212- return matrix ;
245+ function sortByBestRowSplit ( a , b ) {
246+ let res = b [ 0 ] - a [ 0 ] ; // More rows with same number of columns (DESC)
247+ if ( res == 0 ) {
248+ res = b [ 1 ] - a [ 1 ] ; // More rows with columns split by the pattern (DESC)
249+ if ( res == 0 ) {
250+ res = a [ 2 ] - b [ 2 ] ; // Pattern order (ASC) gives TAB
251+ }
252+ }
253+ return res
254+ }
255+
256+ function getMaxCols ( matrix ) {
257+ let maxCols = 0 ;
258+ let numRows = matrix . length ;
259+ for ( let i = 0 ; i < numRows ; i ++ ) {
260+ let cols = matrix [ i ] . length ;
261+ if ( cols > maxCols ) {
262+ maxCols = cols ;
263+ }
264+ }
265+ return maxCols ;
266+ }
267+
268+ function getNumSplitRows ( matrix , index ) {
269+ let numRowSplit = 0 ;
270+ let numColsEqual = 0 ;
271+ let numCols = - 1 ;
272+ let numRows = matrix . length ;
273+ for ( let i = 0 ; i < numRows ; i ++ ) {
274+ let cols = matrix [ i ] . length ;
275+ if ( cols > 1 ) {
276+ numRowSplit += 1 ;
277+ if ( numCols <= 0 ) {
278+ numCols = cols ;
279+ numColsEqual = 1 ; // First row establishes the column count
280+ } else if ( cols == numCols ) {
281+ numColsEqual += 1 ;
282+ }
283+ }
284+ }
285+ return [ numColsEqual , numRowSplit , index ] ;
213286}
214287
215288module . exports = {
0 commit comments