@@ -7,7 +7,7 @@ describe('Metafetch: Final Optimized Tests', () => {
77 let serverInvalidAssets : Server , serverUaEcho : Server , serverEmptyBody : Server ,
88 serverPrimaryMeta : Server , serverBaseTag : Server , serverCharset : Server ,
99 serverFallbackMeta : Server , serverAssetFallback : Server , serverBaseNoHref : Server ,
10- serverMalformedAssets : Server , serverAmp : Server , serverHttp : Server ;
10+ serverMalformedAssets : Server , serverAmp : Server , serverHttp : Server , serverJsonLd : Server ;
1111
1212 before ( ( done ) => {
1313 serverInvalidAssets = http . createServer ( ( req , res ) => {
@@ -48,14 +48,39 @@ describe('Metafetch: Final Optimized Tests', () => {
4848 else if ( req . url ?. startsWith ( '/page' ) ) res . setHeader ( 'Content-Type' , 'text/html' ) . end ( '<html><title>T</title></html>' ) ;
4949 else res . setHeader ( 'Content-Type' , 'application/pdf' ) . end ( '%PDF-1.4' ) ;
5050 } ) . listen ( 2511 , '127.0.0.1' ) ;
51- serverHttp . on ( 'listening' , done ) ;
51+ serverJsonLd = http . createServer ( ( req , res ) => {
52+ res . setHeader ( 'Content-Type' , 'text/html' ) ;
53+ let body = '' ;
54+ switch ( req . url ) {
55+ case '/basic' :
56+ body = `<html><head><script type="application/ld+json">{"@context":"https://schema.org","@type":"NewsArticle","headline":"Article Headline"}</script></head></html>` ;
57+ break ;
58+ case '/nested' :
59+ body = `<html><head><script type="application/ld+json">{"@context":"https://schema.org","author":{"@type":"Person","name":"Jane Doe"}, "unsupported": ["item1", "item2"]}</script></head></html>` ;
60+ break ;
61+ case '/malformed' :
62+ body = `<html><head><meta name="description" content="Good"><script type="application/ld+json">{ "key": "value", </script></head></html>` ;
63+ break ;
64+ case '/multiple' :
65+ body = `<html><head><meta name="description" content="A page with two scripts."><script type="application/ld+json">{"@type":"Organization","name":"My Company"}</script><script type="application/ld+json">{"@type":"WebSite","url":"https://example.com"}</script></head></html>` ;
66+ break ;
67+ case '/empty' :
68+ body = `<html><head><script type="application/ld+json"></script></head></html>` ;
69+ break ;
70+ case '/non_object' :
71+ body = `<html><head><script type="application/ld+json">"this is a string, not an object"</script></head></html>` ;
72+ break ;
73+ }
74+ res . end ( body ) ;
75+ } ) . listen ( 2512 , '127.0.0.1' ) ;
76+ serverJsonLd . on ( 'listening' , done ) ;
5277 } ) ;
5378
5479 after ( ( ) => {
5580 serverInvalidAssets . close ( ) ; serverUaEcho . close ( ) ; serverEmptyBody . close ( ) ;
5681 serverPrimaryMeta . close ( ) ; serverBaseTag . close ( ) ; serverCharset . close ( ) ;
5782 serverFallbackMeta . close ( ) ; serverAssetFallback . close ( ) ; serverBaseNoHref . close ( ) ;
58- serverMalformedAssets . close ( ) ; serverAmp . close ( ) ; serverHttp . close ( ) ;
83+ serverMalformedAssets . close ( ) ; serverAmp . close ( ) ; serverHttp . close ( ) ; serverJsonLd . close ( ) ;
5984 } ) ;
6085
6186 // --- Test Suites ---
@@ -65,7 +90,7 @@ describe('Metafetch: Final Optimized Tests', () => {
6590 it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should return a Promise' , ( ) => {
6691 const promise = new Metafetch ( ) . fetch ( 'http://127.0.0.1:2511/page' ) ;
6792 expect ( promise ) . to . be . an . instanceOf ( Promise ) ;
68- promise . catch ( ( ) => { } ) ; // Suppress unhandled rejection warning
93+ promise . catch ( ( ) => { } ) ;
6994 } ) ;
7095
7196 it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should reject with an error for an empty URL' , async ( ) => {
@@ -103,7 +128,7 @@ describe('Metafetch: Final Optimized Tests', () => {
103128 }
104129 } ) ;
105130 } ) ;
106-
131+
107132 describe ( '3. User-Agent Management' , ( ) => {
108133 let counter = 0 ;
109134 it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should manage the instance user agent correctly' , ( ) => {
@@ -206,4 +231,66 @@ describe('Metafetch: Final Optimized Tests', () => {
206231 expect ( res . images ) . to . be . an ( 'array' ) . that . is . not . empty ;
207232 } ) ;
208233 } ) ;
234+
235+ describe ( '7. Structured Data (JSON-LD)' , ( ) => {
236+ let counter = 0 ;
237+ const instance = new Metafetch ( ) ;
238+
239+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should extract basic, flat JSON-LD data' , async ( ) => {
240+ const res = await instance . fetch ( 'http://127.0.0.1:2512/basic' ) ;
241+ expect ( res . meta ) . to . deep . include ( {
242+ 'ld:@context' : 'https://schema.org' ,
243+ 'ld:@type' : 'NewsArticle' ,
244+ 'ld:headline' : 'Article Headline'
245+ } ) ;
246+ } ) ;
247+
248+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should extract and flatten nested JSON-LD data' , async ( ) => {
249+ const res = await instance . fetch ( 'http://127.0.0.1:2512/nested' ) ;
250+ expect ( res . meta ) . to . deep . include ( {
251+ 'ld:@context' : 'https://schema.org' ,
252+ 'ld:author:@type' : 'Person' ,
253+ 'ld:author:name' : 'Jane Doe'
254+ } ) ;
255+ // The current implementation doesn't handle arrays, so 'unsupported' should not exist
256+ expect ( res . meta ) . to . not . have . property ( 'ld:unsupported' ) ;
257+ } ) ;
258+
259+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should handle malformed JSON-LD gracefully without crashing' , async ( ) => {
260+ const res = await instance . fetch ( 'http://127.0.0.1:2512/malformed' ) ;
261+ // Regular meta tags should still be parsed
262+ expect ( res . meta ! . description ) . to . equal ( 'Good' ) ;
263+ // Malformed ld+json should not add any 'ld:' keys
264+ const ldKeys = Object . keys ( res . meta ! ) . filter ( k => k . startsWith ( 'ld:' ) ) ;
265+ expect ( ldKeys ) . to . be . empty ;
266+ } ) ;
267+
268+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should merge data from multiple JSON-LD scripts' , async ( ) => {
269+ const res = await instance . fetch ( 'http://127.0.0.1:2512/multiple' ) ;
270+ // Note: The current implementation overwrites duplicate keys.
271+ expect ( res . meta ) . to . deep . equal ( {
272+ 'description' : 'A page with two scripts.' ,
273+ 'ld:@type' : 'WebSite' , // Overwritten by second script
274+ 'ld:name' : 'My Company' ,
275+ 'ld:url' : 'https://example.com'
276+ } ) ;
277+ } ) ;
278+
279+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should not extract JSON-LD when meta flag is disabled' , async ( ) => {
280+ const res = await instance . fetch ( 'http://127.0.0.1:2512/basic' , { flags : { meta : false } } ) ;
281+ expect ( res . meta ) . to . be . undefined ;
282+ } ) ;
283+
284+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should handle an empty JSON-LD script tag' , async ( ) => {
285+ const res = await instance . fetch ( 'http://127.0.0.1:2512/empty' ) ;
286+ expect ( res . meta ) . to . be . an ( 'object' ) . that . is . empty ;
287+ } ) ;
288+
289+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should ignore JSON-LD content that is not a JSON object' , async ( ) => {
290+ const res = await instance . fetch ( 'http://127.0.0.1:2512/non_object' ) ;
291+ expect ( res . meta ) . to . be . an ( 'object' ) . that . is . empty ;
292+ const ldKeys = Object . keys ( res . meta ! ) . filter ( k => k . startsWith ( 'ld:' ) ) ;
293+ expect ( ldKeys ) . to . be . empty ;
294+ } ) ;
295+ } ) ;
209296} ) ;
0 commit comments