@@ -10,21 +10,25 @@ import { readHtml } from "../../src/index.ts";
1010
1111function simpleTable ( headers : string [ ] , rows : string [ ] [ ] ) : string {
1212 const thRow = headers . map ( ( h ) => `<th>${ h } </th>` ) . join ( "" ) ;
13- const trRows = rows
14- . map ( ( r ) => `<tr>${ r . map ( ( c ) => `<td>${ c } </td>` ) . join ( "" ) } </tr>` )
15- . join ( "\n" ) ;
13+ const trRows = rows . map ( ( r ) => `<tr>${ r . map ( ( c ) => `<td>${ c } </td>` ) . join ( "" ) } </tr>` ) . join ( "\n" ) ;
1614 return `<table><thead><tr>${ thRow } </tr></thead><tbody>${ trRows } </tbody></table>` ;
1715}
1816
1917// ─── basic parsing ────────────────────────────────────────────────────────────
2018
2119describe ( "readHtml – basic" , ( ) => {
2220 test ( "parses single table" , ( ) => {
23- const html = simpleTable ( [ "a" , "b" ] , [ [ "1" , "2" ] , [ "3" , "4" ] ] ) ;
21+ const html = simpleTable (
22+ [ "a" , "b" ] ,
23+ [
24+ [ "1" , "2" ] ,
25+ [ "3" , "4" ] ,
26+ ] ,
27+ ) ;
2428 const dfs = readHtml ( html ) ;
2529 expect ( dfs . length ) . toBe ( 1 ) ;
2630 const df = dfs [ 0 ] ! ;
27- expect ( df . columns ) . toEqual ( [ "a" , "b" ] ) ;
31+ expect ( df . columns . toArray ( ) ) . toEqual ( [ "a" , "b" ] ) ;
2832 expect ( df . shape ) . toEqual ( [ 2 , 2 ] ) ;
2933 } ) ;
3034
@@ -33,8 +37,8 @@ describe("readHtml – basic", () => {
3337 const t2 = simpleTable ( [ "y" ] , [ [ "20" ] ] ) ;
3438 const dfs = readHtml ( t1 + t2 ) ;
3539 expect ( dfs . length ) . toBe ( 2 ) ;
36- expect ( dfs [ 0 ] ! . columns ) . toEqual ( [ "x" ] ) ;
37- expect ( dfs [ 1 ] ! . columns ) . toEqual ( [ "y" ] ) ;
40+ expect ( dfs [ 0 ] ! . columns . toArray ( ) ) . toEqual ( [ "x" ] ) ;
41+ expect ( dfs [ 1 ] ! . columns . toArray ( ) ) . toEqual ( [ "y" ] ) ;
3842 } ) ;
3943
4044 test ( "returns empty array when no tables found" , ( ) => {
@@ -57,9 +61,9 @@ describe("readHtml – basic", () => {
5761 } ) ;
5862
5963 test ( "header=null uses integer column names" , ( ) => {
60- const html = ` <table><tr><td>a</td><td>b</td></tr><tr><td>1</td><td>2</td></tr></table>` ;
64+ const html = " <table><tr><td>a</td><td>b</td></tr><tr><td>1</td><td>2</td></tr></table>" ;
6165 const [ df ] = readHtml ( html , { header : null } ) ;
62- expect ( df ! . columns ) . toEqual ( [ "0" , "1" ] ) ;
66+ expect ( df ! . columns . toArray ( ) ) . toEqual ( [ "0" , "1" ] ) ;
6367 expect ( df ! . shape [ 0 ] ) . toBe ( 2 ) ;
6468 } ) ;
6569} ) ;
@@ -73,7 +77,7 @@ describe("readHtml – header", () => {
7377 <tr><td>Alice</td><td>30</td></tr>
7478 </table>` ;
7579 const [ df ] = readHtml ( html , { header : 0 } ) ;
76- expect ( df ! . columns ) . toEqual ( [ "Name" , "Age" ] ) ;
80+ expect ( df ! . columns . toArray ( ) ) . toEqual ( [ "Name" , "Age" ] ) ;
7781 expect ( df ! . shape [ 0 ] ) . toBe ( 1 ) ;
7882 } ) ;
7983
@@ -83,9 +87,10 @@ describe("readHtml – header", () => {
8387 <tr><td>1</td><td>2</td><td>3</td></tr>
8488 </table>` ;
8589 const [ df ] = readHtml ( html ) ;
86- expect ( df ! . columns [ 0 ] ) . toBe ( "x" ) ;
87- expect ( df ! . columns [ 1 ] ) . toBe ( "x.1" ) ;
88- expect ( df ! . columns [ 2 ] ) . toBe ( "y" ) ;
90+ const cols = df ! . columns . toArray ( ) ;
91+ expect ( cols [ 0 ] ) . toBe ( "x" ) ;
92+ expect ( cols [ 1 ] ) . toBe ( "x.1" ) ;
93+ expect ( cols [ 2 ] ) . toBe ( "y" ) ;
8994 } ) ;
9095} ) ;
9196
@@ -94,7 +99,7 @@ describe("readHtml – header", () => {
9499describe ( "readHtml – NA values" , ( ) => {
95100 test ( "empty string becomes null" , ( ) => {
96101 const html = simpleTable ( [ "v" ] , [ [ "" ] , [ "1" ] ] ) ;
97- const [ df ] = readHtml ( html ) ;
102+ const [ df ] = readHtml ( html , { skipBlankLines : false } ) ;
98103 expect ( df ! . col ( "v" ) . toArray ( ) [ 0 ] ) . toBeNull ( ) ;
99104 expect ( df ! . col ( "v" ) . toArray ( ) [ 1 ] ) . toBe ( 1 ) ;
100105 } ) ;
@@ -133,7 +138,7 @@ describe("readHtml – converters", () => {
133138 test ( "decimal separator" , ( ) => {
134139 const html = simpleTable ( [ "n" ] , [ [ "3,14" ] ] ) ;
135140 const [ df ] = readHtml ( html , { decimal : "," } ) ;
136- expect ( ( df ! . col ( "n" ) . toArray ( ) [ 0 ] as number ) ) . toBeCloseTo ( 3.14 ) ;
141+ expect ( df ! . col ( "n" ) . toArray ( ) [ 0 ] as number ) . toBeCloseTo ( 3.14 ) ;
137142 } ) ;
138143} ) ;
139144
@@ -146,7 +151,7 @@ describe("readHtml – filtering", () => {
146151 const t2 = simpleTable ( [ "c" ] , [ [ "3" ] ] ) ;
147152 const dfs = readHtml ( t0 + t1 + t2 , { match : [ 1 ] } ) ;
148153 expect ( dfs . length ) . toBe ( 1 ) ;
149- expect ( dfs [ 0 ] ! . columns ) . toEqual ( [ "b" ] ) ;
154+ expect ( dfs [ 0 ] ! . columns . toArray ( ) ) . toEqual ( [ "b" ] ) ;
150155 } ) ;
151156
152157 test ( "skipRows" , ( ) => {
@@ -181,18 +186,30 @@ describe("readHtml – filtering", () => {
181186
182187describe ( "readHtml – indexCol" , ( ) => {
183188 test ( "sets named column as index" , ( ) => {
184- const html = simpleTable ( [ "id" , "val" ] , [ [ "a" , "1" ] , [ "b" , "2" ] ] ) ;
189+ const html = simpleTable (
190+ [ "id" , "val" ] ,
191+ [
192+ [ "a" , "1" ] ,
193+ [ "b" , "2" ] ,
194+ ] ,
195+ ) ;
185196 const [ df ] = readHtml ( html , { indexCol : "id" } ) ;
186197 // "id" column removed from columns
187- expect ( df ! . columns ) . toEqual ( [ "val" ] ) ;
198+ expect ( df ! . columns . toArray ( ) ) . toEqual ( [ "val" ] ) ;
188199 // index contains "a", "b"
189200 expect ( df ! . index . toArray ( ) ) . toEqual ( [ "a" , "b" ] ) ;
190201 } ) ;
191202
192203 test ( "sets column by integer position as index" , ( ) => {
193- const html = simpleTable ( [ "id" , "val" ] , [ [ "x" , "10" ] , [ "y" , "20" ] ] ) ;
204+ const html = simpleTable (
205+ [ "id" , "val" ] ,
206+ [
207+ [ "x" , "10" ] ,
208+ [ "y" , "20" ] ,
209+ ] ,
210+ ) ;
194211 const [ df ] = readHtml ( html , { indexCol : 0 } ) ;
195- expect ( df ! . columns ) . toEqual ( [ "val" ] ) ;
212+ expect ( df ! . columns . toArray ( ) ) . toEqual ( [ "val" ] ) ;
196213 } ) ;
197214} ) ;
198215
@@ -235,7 +252,7 @@ describe("readHtml – structure variants", () => {
235252 <tr><td>1</td><td>2</td></tr>
236253 </table>` ;
237254 const [ df ] = readHtml ( html ) ;
238- expect ( df ! . columns ) . toEqual ( [ "x" , "y" ] ) ;
255+ expect ( df ! . columns . toArray ( ) ) . toEqual ( [ "x" , "y" ] ) ;
239256 expect ( df ! . shape [ 0 ] ) . toBe ( 1 ) ;
240257 } ) ;
241258
@@ -257,7 +274,7 @@ describe("readHtml – structure variants", () => {
257274 <tr><td id="c1">Alice</td></tr>
258275 </table>` ;
259276 const [ df ] = readHtml ( html , { converters : false } ) ;
260- expect ( df ! . columns ) . toEqual ( [ "Name" ] ) ;
277+ expect ( df ! . columns . toArray ( ) ) . toEqual ( [ "Name" ] ) ;
261278 expect ( df ! . col ( "Name" ) . toArray ( ) [ 0 ] ) . toBe ( "Alice" ) ;
262279 } ) ;
263280
@@ -276,19 +293,24 @@ describe("readHtml – property tests", () => {
276293 test ( "roundtrip: all numeric values survive parse" , ( ) => {
277294 fc . assert (
278295 fc . property (
279- fc . array ( fc . array ( fc . integer ( { min : - 1000 , max : 1000 } ) , { minLength : 1 , maxLength : 5 } ) , {
280- minLength : 1 ,
281- maxLength : 10 ,
282- } ) ,
296+ fc . integer ( { min : 1 , max : 5 } ) . chain ( ( ncols ) =>
297+ fc . array (
298+ fc . array ( fc . integer ( { min : - 1000 , max : 1000 } ) , {
299+ minLength : ncols ,
300+ maxLength : ncols ,
301+ } ) ,
302+ { minLength : 1 , maxLength : 10 } ,
303+ ) ,
304+ ) ,
283305 ( rows ) => {
284306 const ncols = rows [ 0 ] ! . length ;
285307 const headers = Array . from ( { length : ncols } , ( _ , i ) => `col${ i } ` ) ;
286308 const strRows = rows . map ( ( r ) => r . map ( String ) ) ;
287309 const html = simpleTable ( headers , strRows ) ;
288310 const [ df ] = readHtml ( html ) ;
289311 const flatIn = rows . flat ( ) ;
290- const flatOut = headers . flatMap ( ( h ) =>
291- ( df ?. col ( h ) . toArray ( ) ?? [ ] ) . map ( Number ) ,
312+ const flatOut = ( df ?. toRecords ( ) ?? [ ] ) . flatMap ( ( record ) =>
313+ rows [ 0 ] ! . map ( ( _ , ci ) => Number ( record [ headers [ ci ] ! ] ) ) ,
292314 ) ;
293315 // same length
294316 if ( flatIn . length !== flatOut . length ) return false ;
@@ -302,9 +324,9 @@ describe("readHtml – property tests", () => {
302324 test ( "number of returned DataFrames equals number of tables in HTML" , ( ) => {
303325 fc . assert (
304326 fc . property ( fc . integer ( { min : 0 , max : 6 } ) , ( n ) => {
305- const tables = Array . from ( { length : n } , ( _ , i ) =>
306- simpleTable ( [ `c ${ i } ` ] , [ [ "1" ] ] ) ,
307- ) . join ( " " ) ;
327+ const tables = Array . from ( { length : n } , ( _ , i ) => simpleTable ( [ `c ${ i } ` ] , [ [ "1" ] ] ) ) . join (
328+ " " ,
329+ ) ;
308330 const dfs = readHtml ( tables ) ;
309331 return dfs . length === n ;
310332 } ) ,
@@ -332,7 +354,7 @@ describe("readHtml – realistic HTML", () => {
332354
333355 test ( "parses Wikipedia-style table from full HTML doc" , ( ) => {
334356 const [ df ] = readHtml ( wikipedia ) ;
335- expect ( df ! . columns ) . toEqual ( [ "Country" , "Population (M)" , "GDP (B USD)" ] ) ;
357+ expect ( df ! . columns . toArray ( ) ) . toEqual ( [ "Country" , "Population (M)" , "GDP (B USD)" ] ) ;
336358 expect ( df ! . shape ) . toEqual ( [ 3 , 3 ] ) ;
337359 } ) ;
338360
0 commit comments