@@ -19,33 +19,36 @@ public class SerializedFileInfo
1919/// <summary>
2020/// Utility for detecting Unity SerializedFile format by reading and validating the file header.
2121///
22- /// Unity SerializedFiles have two different header formats based on version :
22+ /// Unity SerializedFiles have evolved through several format versions :
2323///
24- /// Legacy Format (versions < 22 / kLargeFilesSupport):
25- /// - 20 byte header using 32-bit offsets/sizes
26- /// - Endianness byte is stored at the END of the file, just before metadata
24+ /// Version < 9:
25+ /// - 20-byte header (SerializedFileHeader32) with 32-bit offsets/sizes
26+ /// - Layout: [header][data][metadata]
27+ /// - Endianness byte stored at END of file, just before metadata
28+ ///
29+ /// Version 9-21:
30+ /// - 20-byte header (SerializedFileHeader32) with 32-bit offsets/sizes
31+ /// - Layout: [header][metadata][data]
32+ /// - Endianness byte at offset 16 in header
2733/// - Limited to 4GB file sizes
2834///
29- /// Modern Format (versions >= 22 / kLargeFilesSupport):
30- /// - 48 byte header using 64-bit offsets/sizes
31- /// - Endianness byte is stored IN the header at offset 40
35+ /// Version >= 22 (kLargeFilesSupport):
36+ /// - 48-byte header (SerializedFileHeader) with 64-bit offsets/sizes
37+ /// - Layout: [header][metadata][data]
38+ /// - Endianness byte at offset 40 in header
3239/// - Supports files larger than 4GB
3340///
34- /// This implementation is based on Unity's SerializedFile.cpp::ReadHeader() from:
35- /// D:\UnitySrc\unity\Runtime\Serialize\SerializedFile.cpp
41+ /// Important: The header itself is always stored in big-endian format on disk,
42+ /// but the m_Endianess byte indicates the endianness of the actual data section.
3643/// </summary>
3744public static class SerializedFileDetector
3845{
39- // Version boundary where header format changed from 32-bit to 64-bit
40- // Corresponds to SerializedFileFormatVersion::kLargeFilesSupport
41- private const uint LargeFilesSupportVersion = 22 ;
42-
43- // Version where endianness byte was moved into the header at offset 40
44- // Prior to this, even modern format files had endianness at the end of the file
45- private const uint EndiannessInHeaderVersion = 23 ;
46+ // Version boundaries for format changes
47+ private const uint NewLayoutVersion = 9 ; // kUnknown_9: Changed from [header][data][metadata] to [header][metadata][data]
48+ private const uint LargeFilesSupportVersion = 22 ; // kLargeFilesSupport: Changed to 64-bit header
4649
4750 // Reasonable version range for SerializedFiles
48- // Unity is currently in the 20s-30s range, so we accept 1-50
51+ // Unity versions currently use values in the 20s-30s range
4952 private const uint MinVersion = 1 ;
5053 private const uint MaxVersion = 50 ;
5154
@@ -54,8 +57,8 @@ public static class SerializedFileDetector
5457 private const byte BigEndian = 1 ;
5558
5659 // Header sizes
57- private const int LegacyHeaderSize = 20 ;
58- private const int ModernHeaderSize = 48 ;
60+ private const int LegacyHeaderSize = 20 ; // SerializedFileHeader32
61+ private const int ModernHeaderSize = 48 ; // SerializedFileHeader
5962
6063 /// <summary>
6164 /// Attempts to detect if a file is a Unity SerializedFile by reading and validating its header.
@@ -92,28 +95,27 @@ public static bool TryDetectSerializedFile(string filePath, out SerializedFileIn
9295 // STEP 1: Read version to determine header format
9396 // ============================================================
9497
95- // The version field is at the same offset in both formats:
96- // - Legacy header: bytes 8-11 (UInt32)
97- // - Modern header: bytes 8-11 (UInt32)
98+ // The version field is always at offset 8 in both header formats.
99+ // The header itself is always stored in big-endian format on disk.
100+ // On little-endian platforms (Windows, etc.), we need to swap the header fields.
98101 //
99- // We need to handle potential endianness swap. Try reading the version
100- // in little-endian first, and if it's out of range, try swapping.
102+ // We try both interpretations to determine if swapping is needed:
101103 uint versionLE = BitConverter . ToUInt32 ( headerBytes , 8 ) ;
102104 uint versionBE = SwapUInt32 ( versionLE ) ;
103105
104- // Determine which endianness gives us a valid version
106+ // Determine which interpretation gives us a valid version number
105107 uint version ;
106- bool needsSwap ;
108+ bool needsSwap ; // Whether header fields need byte swapping
107109
108110 if ( versionLE >= MinVersion && versionLE <= MaxVersion )
109111 {
110- // Little -endian interpretation is valid
112+ // Reading as little -endian gives valid version (header is in little-endian format)
111113 version = versionLE ;
112114 needsSwap = false ;
113115 }
114116 else if ( versionBE >= MinVersion && versionBE <= MaxVersion )
115117 {
116- // Big -endian interpretation is valid
118+ // Reading as big -endian gives valid version (header is in big-endian format)
117119 version = versionBE ;
118120 needsSwap = true ;
119121 }
@@ -123,26 +125,37 @@ public static bool TryDetectSerializedFile(string filePath, out SerializedFileIn
123125 return false ;
124126 }
125127
128+ // Determine header format based on version
126129 bool isLegacyFormat = version < LargeFilesSupportVersion ;
127130
128131 // ============================================================
129- // STEP 2: Determine endianness and swap if needed
132+ // STEP 2: Read endianness byte
130133 // ============================================================
134+ //
135+ // The m_Endianess byte indicates the endianness of the DATA section
136+ // (not the header, which is always big-endian on disk).
137+ // Location depends on version:
138+ // - Version < 9: At end of file (before metadata) - we skip reading it for detection
139+ // - Version 9-21: At offset 16 in the 20-byte header
140+ // - Version >= 22: At offset 40 in the 48-byte header
141+ //
142+ // The endianness byte is never swapped (it's a single byte).
131143
132144 byte endianness ;
133- if ( isLegacyFormat )
145+
146+ if ( version < NewLayoutVersion )
134147 {
135- // Legacy format : Endianness byte is at the END of the file
136- // It's located just before the metadata section
137- // For detection purposes, we already determined endianness from the version field
148+ // Version < 9 : Endianness is at the end of the file
149+ // For detection purposes, we infer it from the header byte order
150+ // (though this is technically the header's endianness, not the data's)
138151 endianness = needsSwap ? BigEndian : LittleEndian ;
139152 }
140- else if ( version >= EndiannessInHeaderVersion )
153+ else if ( isLegacyFormat )
141154 {
142- // Modern format (version >= 23) : Endianness byte is at offset 40 in the header
143- if ( bytesRead >= 41 )
155+ // Version 9-21 : Endianness is at offset 16 in SerializedFileHeader32
156+ if ( bytesRead >= 17 )
144157 {
145- endianness = headerBytes [ 40 ] ;
158+ endianness = headerBytes [ 16 ] ;
146159
147160 // Validate endianness value
148161 if ( endianness != LittleEndian && endianness != BigEndian )
@@ -155,20 +168,18 @@ public static bool TryDetectSerializedFile(string filePath, out SerializedFileIn
155168 }
156169 else
157170 {
158- // Version 22: Uses 64-bit header but endianness byte is still at end of file
159- // For detection purposes, use what we determined from the version field
160- endianness = needsSwap ? BigEndian : LittleEndian ;
161- }
171+ // Version >= 22: Endianness is at offset 40 in SerializedFileHeader
172+ if ( bytesRead >= 41 )
173+ {
174+ endianness = headerBytes [ 40 ] ;
162175
163- // Verify the endianness byte matches what we detected from the version field
164- // Only do this for versions where endianness is in the header (>= 23)
165- if ( ! isLegacyFormat && version >= EndiannessInHeaderVersion )
166- {
167- bool endiannessIndicatesSwap = ( endianness == BigEndian ) ;
168- if ( endiannessIndicatesSwap != needsSwap )
176+ // Validate endianness value
177+ if ( endianness != LittleEndian && endianness != BigEndian )
178+ return false ;
179+ }
180+ else
169181 {
170- // Endianness byte doesn't match what we detected - suspicious
171- return false ;
182+ return false ; // File truncated
172183 }
173184 }
174185
@@ -180,15 +191,16 @@ public static bool TryDetectSerializedFile(string filePath, out SerializedFileIn
180191
181192 if ( isLegacyFormat )
182193 {
183- // Legacy Header Layout (20 bytes total):
194+ // SerializedFileHeader32 Layout (20 bytes total):
184195 // Offset 0-3: UInt32 m_MetadataSize
185196 // Offset 4-7: UInt32 m_FileSize
186197 // Offset 8-11: UInt32 m_Version
187198 // Offset 12-15: UInt32 m_DataOffset
188- // Offset 16-19: Reserved/padding
199+ // Offset 16: UInt8 m_Endianess (only present for version >= 9)
200+ // Offset 17-19: UInt8 m_Reserved[3]
189201 //
190- // Note: m_Endianess is NOT in the header for legacy format!
191- // It's stored at the end of the file before metadata.
202+ // Note: For version < 9, m_Endianess is NOT in the header.
203+ // It's stored at the end of the file, just before metadata begins .
192204
193205 uint metadataSize32 = ReadUInt32 ( headerBytes , 0 , needsSwap ) ;
194206 uint fileSize32 = ReadUInt32 ( headerBytes , 4 , needsSwap ) ;
@@ -200,23 +212,22 @@ public static bool TryDetectSerializedFile(string filePath, out SerializedFileIn
200212 dataOffset = dataOffset32 ;
201213
202214 // Special case: Legacy format used UInt32.MaxValue to indicate "unknown" file size
203- // In 64-bit representation, this should be handled
204215 if ( fileSize32 == uint . MaxValue )
205216 {
206217 fileSize = ulong . MaxValue ;
207218 }
208219 }
209220 else
210221 {
211- // Modern Header Layout (48 bytes total):
212- // Offset 0-7: Reserved (8 bytes )
213- // Offset 8-11: UInt32 m_Version
214- // Offset 12-15: Reserved (4 bytes )
215- // Offset 16-23: UInt64 m_MetadataSize
216- // Offset 24-31: UInt64 m_FileSize
217- // Offset 32-39: UInt64 m_DataOffset
218- // Offset 40: UInt8 m_Endianess
219- // Offset 41-47: Reserved (7 bytes)
222+ // SerializedFileHeader Layout (48 bytes total):
223+ // Offset 0-7: UInt8[8] m_Legacy (unused, allows struct alignment with SerializedFileHeader32 )
224+ // Offset 8-11: UInt32 m_Version
225+ // Offset 12-15: UInt8[4] m_Reserved0 (explicit padding )
226+ // Offset 16-23: UInt64 m_MetadataSize
227+ // Offset 24-31: UInt64 m_FileSize
228+ // Offset 32-39: UInt64 m_DataOffset
229+ // Offset 40: UInt8 m_Endianess
230+ // Offset 41-47: UInt8[7] m_Reserved1
220231
221232 metadataSize = ReadUInt64 ( headerBytes , 16 , needsSwap ) ;
222233 fileSize = ReadUInt64 ( headerBytes , 24 , needsSwap ) ;
0 commit comments