@@ -2562,6 +2562,193 @@ mod archive_structure_tests {
25622562 }
25632563}
25642564
2565+ /// Asset embedding tests - Per spec §05-asset-embedding.md
2566+ mod asset_embedding_tests {
2567+ use cdx_core:: archive:: { CdxReader , CdxWriter , CompressionMethod } ;
2568+ use cdx_core:: asset:: { verify_asset_hash, ImageAsset , ImageFormat , ImageIndex } ;
2569+ use cdx_core:: { ContentRef , DocumentId , HashAlgorithm , Hasher , Manifest , Metadata , Result } ;
2570+
2571+ const CONTENT_PATH : & str = "content/document.json" ;
2572+ const DUBLIN_CORE_PATH : & str = "metadata/dublin-core.json" ;
2573+ const ASSET_PATH : & str = "assets/images/logo.png" ;
2574+ const INDEX_PATH : & str = "assets/images/index.json" ;
2575+
2576+ fn create_test_manifest ( ) -> Manifest {
2577+ let content = ContentRef {
2578+ path : CONTENT_PATH . to_string ( ) ,
2579+ hash : DocumentId :: pending ( ) ,
2580+ compression : None ,
2581+ merkle_root : None ,
2582+ block_count : None ,
2583+ } ;
2584+ let metadata = Metadata {
2585+ dublin_core : DUBLIN_CORE_PATH . to_string ( ) ,
2586+ custom : None ,
2587+ } ;
2588+ Manifest :: new ( content, metadata)
2589+ }
2590+
2591+ /// Per spec §05-asset-embedding.md §8.1 - Asset hash must match file content
2592+ #[ test]
2593+ fn test_asset_index_hash_matches_file ( ) -> Result < ( ) > {
2594+ let asset_data = b"fake PNG image data for testing" ;
2595+ let hash = Hasher :: hash ( HashAlgorithm :: Sha256 , asset_data) ;
2596+
2597+ // verify_asset_hash should pass when hash matches
2598+ assert ! ( verify_asset_hash( ASSET_PATH , asset_data, & hash, HashAlgorithm :: Sha256 ) . is_ok( ) ) ;
2599+
2600+ // Build an archive with the asset and verify via CdxReader
2601+ let mut writer = CdxWriter :: in_memory ( ) ;
2602+ let manifest = create_test_manifest ( ) ;
2603+ writer. write_manifest ( & manifest) ?;
2604+ writer. write_file (
2605+ CONTENT_PATH ,
2606+ br#"{"version":"0.1","blocks":[]}"# ,
2607+ CompressionMethod :: Deflate ,
2608+ ) ?;
2609+ writer. write_file (
2610+ DUBLIN_CORE_PATH ,
2611+ br#"{"title":"Test"}"# ,
2612+ CompressionMethod :: Deflate ,
2613+ ) ?;
2614+ writer. write_file ( ASSET_PATH , asset_data, CompressionMethod :: Stored ) ?;
2615+
2616+ let bytes = writer. finish ( ) ?. into_inner ( ) ;
2617+ let mut reader = CdxReader :: from_bytes ( bytes) ?;
2618+
2619+ // Read the asset file and verify its hash
2620+ let read_data = reader. read_file_verified ( ASSET_PATH , & hash) ?;
2621+ assert_eq ! ( read_data, asset_data) ;
2622+
2623+ Ok ( ( ) )
2624+ }
2625+
2626+ /// Per spec §05-asset-embedding.md §8.1 - Missing asset file = error
2627+ #[ test]
2628+ fn test_asset_missing_file_error ( ) -> Result < ( ) > {
2629+ // Create an archive WITHOUT the asset file
2630+ let mut writer = CdxWriter :: in_memory ( ) ;
2631+ let manifest = create_test_manifest ( ) ;
2632+ writer. write_manifest ( & manifest) ?;
2633+ writer. write_file (
2634+ CONTENT_PATH ,
2635+ br#"{"version":"0.1","blocks":[]}"# ,
2636+ CompressionMethod :: Deflate ,
2637+ ) ?;
2638+ writer. write_file (
2639+ DUBLIN_CORE_PATH ,
2640+ br#"{"title":"Test"}"# ,
2641+ CompressionMethod :: Deflate ,
2642+ ) ?;
2643+
2644+ // Write an asset index that references a file not in the archive
2645+ let hash = Hasher :: hash ( HashAlgorithm :: Sha256 , b"nonexistent data" ) ;
2646+ let image = ImageAsset :: new ( "logo" , ImageFormat :: Png )
2647+ . with_hash ( hash)
2648+ . with_size ( 100 ) ;
2649+ let mut index: ImageIndex = Default :: default ( ) ;
2650+ index. add ( image, 100 ) ;
2651+ let index_json = serde_json:: to_vec_pretty ( & index) ?;
2652+ writer. write_file ( INDEX_PATH , & index_json, CompressionMethod :: Deflate ) ?;
2653+
2654+ let bytes = writer. finish ( ) ?. into_inner ( ) ;
2655+ let mut reader = CdxReader :: from_bytes ( bytes) ?;
2656+
2657+ // Trying to read the missing asset file should fail
2658+ let result = reader. read_file ( ASSET_PATH ) ;
2659+ assert ! ( result. is_err( ) , "Reading a missing asset file should error" ) ;
2660+
2661+ Ok ( ( ) )
2662+ }
2663+
2664+ /// Per spec §05-asset-embedding.md §8.1 - Hash mismatch = error
2665+ #[ test]
2666+ fn test_asset_hash_mismatch_error ( ) -> Result < ( ) > {
2667+ let asset_data = b"actual asset content" ;
2668+ let wrong_hash = Hasher :: hash ( HashAlgorithm :: Sha256 , b"different content" ) ;
2669+
2670+ // verify_asset_hash should fail when hash doesn't match
2671+ let result = verify_asset_hash ( ASSET_PATH , asset_data, & wrong_hash, HashAlgorithm :: Sha256 ) ;
2672+ assert ! ( result. is_err( ) , "Hash mismatch should produce error" ) ;
2673+
2674+ // Also verify via CdxReader::read_file_verified
2675+ let mut writer = CdxWriter :: in_memory ( ) ;
2676+ let manifest = create_test_manifest ( ) ;
2677+ writer. write_manifest ( & manifest) ?;
2678+ writer. write_file (
2679+ CONTENT_PATH ,
2680+ br#"{"version":"0.1","blocks":[]}"# ,
2681+ CompressionMethod :: Deflate ,
2682+ ) ?;
2683+ writer. write_file (
2684+ DUBLIN_CORE_PATH ,
2685+ br#"{"title":"Test"}"# ,
2686+ CompressionMethod :: Deflate ,
2687+ ) ?;
2688+ writer. write_file ( ASSET_PATH , asset_data, CompressionMethod :: Stored ) ?;
2689+
2690+ let bytes = writer. finish ( ) ?. into_inner ( ) ;
2691+ let mut reader = CdxReader :: from_bytes ( bytes) ?;
2692+
2693+ let result = reader. read_file_verified ( ASSET_PATH , & wrong_hash) ;
2694+ assert ! (
2695+ result. is_err( ) ,
2696+ "read_file_verified should fail on hash mismatch"
2697+ ) ;
2698+
2699+ Ok ( ( ) )
2700+ }
2701+
2702+ /// Per spec §05-asset-embedding.md §4.1 - Asset references in content
2703+ /// affect document ID (Image block src is part of content hash)
2704+ #[ test]
2705+ fn test_asset_hashes_included_in_document_id ( ) -> Result < ( ) > {
2706+ use cdx_core:: content:: Block ;
2707+ use cdx_core:: Document ;
2708+
2709+ // Two documents with different Image block src paths should have
2710+ // different document IDs, because the src field is part of the
2711+ // content which is included in the document ID hash.
2712+ let doc1 = Document :: builder ( )
2713+ . title ( "Asset ID Test" )
2714+ . creator ( "Author" )
2715+ . add_paragraph ( "Text before image" )
2716+ . add_block ( Block :: image ( "assets/images/photo_v1.png" , "Photo" ) )
2717+ . build ( ) ?;
2718+
2719+ let doc2 = Document :: builder ( )
2720+ . title ( "Asset ID Test" )
2721+ . creator ( "Author" )
2722+ . add_paragraph ( "Text before image" )
2723+ . add_block ( Block :: image ( "assets/images/photo_v2.png" , "Photo" ) )
2724+ . build ( ) ?;
2725+
2726+ let id1 = doc1. compute_id ( ) ?;
2727+ let id2 = doc2. compute_id ( ) ?;
2728+
2729+ assert_ne ! (
2730+ id1, id2,
2731+ "Different asset references in content should produce different document IDs"
2732+ ) ;
2733+
2734+ // Same asset path should produce same document ID
2735+ let doc3 = Document :: builder ( )
2736+ . title ( "Asset ID Test" )
2737+ . creator ( "Author" )
2738+ . add_paragraph ( "Text before image" )
2739+ . add_block ( Block :: image ( "assets/images/photo_v1.png" , "Photo" ) )
2740+ . build ( ) ?;
2741+
2742+ let id3 = doc3. compute_id ( ) ?;
2743+ assert_eq ! (
2744+ id1, id3,
2745+ "Same asset references should produce same document ID"
2746+ ) ;
2747+
2748+ Ok ( ( ) )
2749+ }
2750+ }
2751+
25652752/// Property-based tests using proptest
25662753#[ cfg( test) ]
25672754mod proptest_tests {
@@ -2617,5 +2804,77 @@ mod proptest_tests {
26172804 prop_assert_eq!( doc. title( ) , loaded. title( ) ) ;
26182805 prop_assert_eq!( doc. content( ) . blocks. len( ) , loaded. content( ) . blocks. len( ) ) ;
26192806 }
2807+
2808+ /// Per spec §06-document-hashing.md §4.1 - Metadata subset changes affect hash
2809+ #[ test]
2810+ fn proptest_hash_boundary_metadata_inclusion(
2811+ title1 in "[a-zA-Z ]{1,50}" ,
2812+ title2 in "[a-zA-Z ]{1,50}" ,
2813+ creator1 in "[a-zA-Z ]{1,30}" ,
2814+ creator2 in "[a-zA-Z ]{1,30}" ,
2815+ ) {
2816+ // When both title and creator differ, the document IDs must differ.
2817+ // (Skip when all pairs happen to match by coincidence.)
2818+ prop_assume!( title1 != title2 || creator1 != creator2) ;
2819+
2820+ let doc1 = Document :: builder( )
2821+ . title( & title1)
2822+ . creator( & creator1)
2823+ . add_paragraph( "Fixed content" )
2824+ . build( )
2825+ . unwrap( ) ;
2826+
2827+ let doc2 = Document :: builder( )
2828+ . title( & title2)
2829+ . creator( & creator2)
2830+ . add_paragraph( "Fixed content" )
2831+ . build( )
2832+ . unwrap( ) ;
2833+
2834+ let id1 = doc1. compute_id( ) . unwrap( ) ;
2835+ let id2 = doc2. compute_id( ) . unwrap( ) ;
2836+
2837+ prop_assert_ne!(
2838+ id1, id2,
2839+ "Different identity metadata should produce different hashes"
2840+ ) ;
2841+ }
2842+
2843+ /// Valid blocks always serialize to JSON with a "type" field and deserialize back
2844+ #[ test]
2845+ fn proptest_block_structure_constraints(
2846+ text in "[a-zA-Z0-9 .,!?]{1,100}" ,
2847+ level in 1u8 ..=6u8 ,
2848+ lang in "(rust|python|javascript|go|java)"
2849+ ) {
2850+ use cdx_core:: content:: Block ;
2851+
2852+ let blocks = vec![
2853+ Block :: paragraph( vec![ ] ) ,
2854+ Block :: heading( level, vec![ ] ) ,
2855+ Block :: code_block( text, Some ( lang) ) ,
2856+ Block :: horizontal_rule( ) ,
2857+ Block :: blockquote( vec![ ] ) ,
2858+ ] ;
2859+
2860+ for block in & blocks {
2861+ let json = serde_json:: to_value( block) . unwrap( ) ;
2862+ // Every block must have a "type" field
2863+ prop_assert!(
2864+ json. get( "type" ) . is_some( ) ,
2865+ "Block {:?} must serialize with a 'type' field" ,
2866+ block
2867+ ) ;
2868+
2869+ // Round-trip: deserialize should produce an equivalent block
2870+ let json_str = serde_json:: to_string( block) . unwrap( ) ;
2871+ let deserialized: Block = serde_json:: from_str( & json_str) . unwrap( ) ;
2872+ let re_serialized = serde_json:: to_string( & deserialized) . unwrap( ) ;
2873+ prop_assert_eq!(
2874+ json_str, re_serialized,
2875+ "Block round-trip should be stable"
2876+ ) ;
2877+ }
2878+ }
26202879 }
26212880}
0 commit comments