Skip to content

Commit d2a99cd

Browse files
More accurate SerializedFile header parsing
1 parent 7cf9156 commit d2a99cd

4 files changed

Lines changed: 105 additions & 67 deletions

File tree

Analyzer.Tests/FileDetectionTests.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ public void TryDetectSerializedFile_ValidPlayerDataFile_ReturnsTrue()
4343
Assert.That(info.FileSize, Is.EqualTo(31988UL), "FileSize should be 31988");
4444
Assert.That(info.MetadataSize, Is.EqualTo(24580UL), "MetadataSize should be 24580");
4545
Assert.That(info.DataOffset, Is.EqualTo(24640UL), "DataOffset should be 24640");
46-
Assert.That(info.Endianness, Is.EqualTo((byte)1), "Endianness should be 1 (BigEndian)");
46+
Assert.That(info.Endianness, Is.EqualTo((byte)0), "Endianness should be 0 (LittleEndian)");
4747
Assert.IsFalse(info.IsLegacyFormat, "Version 22 uses modern format (64-bit header)");
4848
}
4949

@@ -64,7 +64,7 @@ public void TryDetectSerializedFile_SerializedFileInsideArchive_ReturnsTrue()
6464
Assert.That(info.FileSize, Is.EqualTo(595380UL), "FileSize should be 595380");
6565
Assert.That(info.MetadataSize, Is.EqualTo(61328UL), "MetadataSize should be 61328");
6666
Assert.That(info.DataOffset, Is.EqualTo(61360UL), "DataOffset should be 61360");
67-
Assert.That(info.Endianness, Is.EqualTo((byte)1), "Endianness should be 1 (BigEndian)");
67+
Assert.That(info.Endianness, Is.EqualTo((byte)0), "Endianness should be 0 (LittleEndian)");
6868
Assert.IsTrue(info.IsLegacyFormat, "Version 17 uses legacy format (32-bit header)");
6969
}
7070

Analyzer/Util/SerializedFileDetector.cs

Lines changed: 74 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -19,33 +19,36 @@ public class SerializedFileInfo
1919
/// <summary>
2020
/// Utility for detecting Unity SerializedFile format by reading and validating the file header.
2121
///
22-
/// Unity SerializedFiles have two different header formats based on version:
22+
/// Unity SerializedFiles have evolved through several format versions:
2323
///
24-
/// Legacy Format (versions &lt; 22 / kLargeFilesSupport):
25-
/// - 20 byte header using 32-bit offsets/sizes
26-
/// - Endianness byte is stored at the END of the file, just before metadata
24+
/// Version &lt; 9:
25+
/// - 20-byte header (SerializedFileHeader32) with 32-bit offsets/sizes
26+
/// - Layout: [header][data][metadata]
27+
/// - Endianness byte stored at END of file, just before metadata
28+
///
29+
/// Version 9-21:
30+
/// - 20-byte header (SerializedFileHeader32) with 32-bit offsets/sizes
31+
/// - Layout: [header][metadata][data]
32+
/// - Endianness byte at offset 16 in header
2733
/// - Limited to 4GB file sizes
2834
///
29-
/// Modern Format (versions &gt;= 22 / kLargeFilesSupport):
30-
/// - 48 byte header using 64-bit offsets/sizes
31-
/// - Endianness byte is stored IN the header at offset 40
35+
/// Version &gt;= 22 (kLargeFilesSupport):
36+
/// - 48-byte header (SerializedFileHeader) with 64-bit offsets/sizes
37+
/// - Layout: [header][metadata][data]
38+
/// - Endianness byte at offset 40 in header
3239
/// - Supports files larger than 4GB
3340
///
34-
/// This implementation is based on Unity's SerializedFile.cpp::ReadHeader() from:
35-
/// D:\UnitySrc\unity\Runtime\Serialize\SerializedFile.cpp
41+
/// Important: The header itself is always stored in big-endian format on disk,
42+
/// but the m_Endianess byte indicates the endianness of the actual data section.
3643
/// </summary>
3744
public static class SerializedFileDetector
3845
{
39-
// Version boundary where header format changed from 32-bit to 64-bit
40-
// Corresponds to SerializedFileFormatVersion::kLargeFilesSupport
41-
private const uint LargeFilesSupportVersion = 22;
42-
43-
// Version where endianness byte was moved into the header at offset 40
44-
// Prior to this, even modern format files had endianness at the end of the file
45-
private const uint EndiannessInHeaderVersion = 23;
46+
// Version boundaries for format changes
47+
private const uint NewLayoutVersion = 9; // kUnknown_9: Changed from [header][data][metadata] to [header][metadata][data]
48+
private const uint LargeFilesSupportVersion = 22; // kLargeFilesSupport: Changed to 64-bit header
4649

4750
// Reasonable version range for SerializedFiles
48-
// Unity is currently in the 20s-30s range, so we accept 1-50
51+
// Unity versions currently use values in the 20s-30s range
4952
private const uint MinVersion = 1;
5053
private const uint MaxVersion = 50;
5154

@@ -54,8 +57,8 @@ public static class SerializedFileDetector
5457
private const byte BigEndian = 1;
5558

5659
// Header sizes
57-
private const int LegacyHeaderSize = 20;
58-
private const int ModernHeaderSize = 48;
60+
private const int LegacyHeaderSize = 20; // SerializedFileHeader32
61+
private const int ModernHeaderSize = 48; // SerializedFileHeader
5962

6063
/// <summary>
6164
/// Attempts to detect if a file is a Unity SerializedFile by reading and validating its header.
@@ -92,28 +95,27 @@ public static bool TryDetectSerializedFile(string filePath, out SerializedFileIn
9295
// STEP 1: Read version to determine header format
9396
// ============================================================
9497

95-
// The version field is at the same offset in both formats:
96-
// - Legacy header: bytes 8-11 (UInt32)
97-
// - Modern header: bytes 8-11 (UInt32)
98+
// The version field is always at offset 8 in both header formats.
99+
// The header itself is always stored in big-endian format on disk.
100+
// On little-endian platforms (Windows, etc.), we need to swap the header fields.
98101
//
99-
// We need to handle potential endianness swap. Try reading the version
100-
// in little-endian first, and if it's out of range, try swapping.
102+
// We try both interpretations to determine if swapping is needed:
101103
uint versionLE = BitConverter.ToUInt32(headerBytes, 8);
102104
uint versionBE = SwapUInt32(versionLE);
103105

104-
// Determine which endianness gives us a valid version
106+
// Determine which interpretation gives us a valid version number
105107
uint version;
106-
bool needsSwap;
108+
bool needsSwap; // Whether header fields need byte swapping
107109

108110
if (versionLE >= MinVersion && versionLE <= MaxVersion)
109111
{
110-
// Little-endian interpretation is valid
112+
// Reading as little-endian gives valid version (header is in little-endian format)
111113
version = versionLE;
112114
needsSwap = false;
113115
}
114116
else if (versionBE >= MinVersion && versionBE <= MaxVersion)
115117
{
116-
// Big-endian interpretation is valid
118+
// Reading as big-endian gives valid version (header is in big-endian format)
117119
version = versionBE;
118120
needsSwap = true;
119121
}
@@ -123,26 +125,37 @@ public static bool TryDetectSerializedFile(string filePath, out SerializedFileIn
123125
return false;
124126
}
125127

128+
// Determine header format based on version
126129
bool isLegacyFormat = version < LargeFilesSupportVersion;
127130

128131
// ============================================================
129-
// STEP 2: Determine endianness and swap if needed
132+
// STEP 2: Read endianness byte
130133
// ============================================================
134+
//
135+
// The m_Endianess byte indicates the endianness of the DATA section
136+
// (not the header, which is always big-endian on disk).
137+
// Location depends on version:
138+
// - Version < 9: At end of file (before metadata) - we skip reading it for detection
139+
// - Version 9-21: At offset 16 in the 20-byte header
140+
// - Version >= 22: At offset 40 in the 48-byte header
141+
//
142+
// The endianness byte is never swapped (it's a single byte).
131143

132144
byte endianness;
133-
if (isLegacyFormat)
145+
146+
if (version < NewLayoutVersion)
134147
{
135-
// Legacy format: Endianness byte is at the END of the file
136-
// It's located just before the metadata section
137-
// For detection purposes, we already determined endianness from the version field
148+
// Version < 9: Endianness is at the end of the file
149+
// For detection purposes, we infer it from the header byte order
150+
// (though this is technically the header's endianness, not the data's)
138151
endianness = needsSwap ? BigEndian : LittleEndian;
139152
}
140-
else if (version >= EndiannessInHeaderVersion)
153+
else if (isLegacyFormat)
141154
{
142-
// Modern format (version >= 23): Endianness byte is at offset 40 in the header
143-
if (bytesRead >= 41)
155+
// Version 9-21: Endianness is at offset 16 in SerializedFileHeader32
156+
if (bytesRead >= 17)
144157
{
145-
endianness = headerBytes[40];
158+
endianness = headerBytes[16];
146159

147160
// Validate endianness value
148161
if (endianness != LittleEndian && endianness != BigEndian)
@@ -155,20 +168,18 @@ public static bool TryDetectSerializedFile(string filePath, out SerializedFileIn
155168
}
156169
else
157170
{
158-
// Version 22: Uses 64-bit header but endianness byte is still at end of file
159-
// For detection purposes, use what we determined from the version field
160-
endianness = needsSwap ? BigEndian : LittleEndian;
161-
}
171+
// Version >= 22: Endianness is at offset 40 in SerializedFileHeader
172+
if (bytesRead >= 41)
173+
{
174+
endianness = headerBytes[40];
162175

163-
// Verify the endianness byte matches what we detected from the version field
164-
// Only do this for versions where endianness is in the header (>= 23)
165-
if (!isLegacyFormat && version >= EndiannessInHeaderVersion)
166-
{
167-
bool endiannessIndicatesSwap = (endianness == BigEndian);
168-
if (endiannessIndicatesSwap != needsSwap)
176+
// Validate endianness value
177+
if (endianness != LittleEndian && endianness != BigEndian)
178+
return false;
179+
}
180+
else
169181
{
170-
// Endianness byte doesn't match what we detected - suspicious
171-
return false;
182+
return false; // File truncated
172183
}
173184
}
174185

@@ -180,15 +191,16 @@ public static bool TryDetectSerializedFile(string filePath, out SerializedFileIn
180191

181192
if (isLegacyFormat)
182193
{
183-
// Legacy Header Layout (20 bytes total):
194+
// SerializedFileHeader32 Layout (20 bytes total):
184195
// Offset 0-3: UInt32 m_MetadataSize
185196
// Offset 4-7: UInt32 m_FileSize
186197
// Offset 8-11: UInt32 m_Version
187198
// Offset 12-15: UInt32 m_DataOffset
188-
// Offset 16-19: Reserved/padding
199+
// Offset 16: UInt8 m_Endianess (only present for version >= 9)
200+
// Offset 17-19: UInt8 m_Reserved[3]
189201
//
190-
// Note: m_Endianess is NOT in the header for legacy format!
191-
// It's stored at the end of the file before metadata.
202+
// Note: For version < 9, m_Endianess is NOT in the header.
203+
// It's stored at the end of the file, just before metadata begins.
192204

193205
uint metadataSize32 = ReadUInt32(headerBytes, 0, needsSwap);
194206
uint fileSize32 = ReadUInt32(headerBytes, 4, needsSwap);
@@ -200,23 +212,22 @@ public static bool TryDetectSerializedFile(string filePath, out SerializedFileIn
200212
dataOffset = dataOffset32;
201213

202214
// Special case: Legacy format used UInt32.MaxValue to indicate "unknown" file size
203-
// In 64-bit representation, this should be handled
204215
if (fileSize32 == uint.MaxValue)
205216
{
206217
fileSize = ulong.MaxValue;
207218
}
208219
}
209220
else
210221
{
211-
// Modern Header Layout (48 bytes total):
212-
// Offset 0-7: Reserved (8 bytes)
213-
// Offset 8-11: UInt32 m_Version
214-
// Offset 12-15: Reserved (4 bytes)
215-
// Offset 16-23: UInt64 m_MetadataSize
216-
// Offset 24-31: UInt64 m_FileSize
217-
// Offset 32-39: UInt64 m_DataOffset
218-
// Offset 40: UInt8 m_Endianess
219-
// Offset 41-47: Reserved (7 bytes)
222+
// SerializedFileHeader Layout (48 bytes total):
223+
// Offset 0-7: UInt8[8] m_Legacy (unused, allows struct alignment with SerializedFileHeader32)
224+
// Offset 8-11: UInt32 m_Version
225+
// Offset 12-15: UInt8[4] m_Reserved0 (explicit padding)
226+
// Offset 16-23: UInt64 m_MetadataSize
227+
// Offset 24-31: UInt64 m_FileSize
228+
// Offset 32-39: UInt64 m_DataOffset
229+
// Offset 40: UInt8 m_Endianess
230+
// Offset 41-47: UInt8[7] m_Reserved1
220231

221232
metadataSize = ReadUInt64(headerBytes, 16, needsSwap);
222233
fileSize = ReadUInt64(headerBytes, 24, needsSwap);

Documentation/command-serialized-file.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,18 @@
22

33
The `serialized-file` command (alias: `sf`) provides utilities for quickly inspecting SerializedFile metadata without performing a full analysis.
44

5+
This exposes information about the Binary SerializedFile format. This format has evolved over time, but all recent versions have
6+
* a small header section (exposed by the `header` subcommand)
7+
* a metadata section which contains summary of the data
8+
* Unity Version and target platform
9+
* typetree information
10+
* the list of objects and offsets
11+
* external references
12+
* the data section which contains the Unity objects in serialized form
13+
14+
The 'externalrefs' and 'objectlist' sub-commands expose information from the metadata section.
15+
The `dump` command can be used to view the serialized objects.
16+
517
## Sub-Commands
618

719
| Sub-Command | Description |
@@ -175,7 +187,7 @@ UnityDataTool serialized-file header level0 --format json
175187
"fileSize": 31988,
176188
"metadataSize": 24580,
177189
"dataOffset": 24640,
178-
"endianness": "Big Endian"
190+
"endianness": "Little Endian"
179191
}
180192
```
181193

@@ -188,7 +200,7 @@ UnityDataTool serialized-file header level0 --format json
188200
| **File Size** | Total size of the SerializedFile in bytes. Padding might make the actual file size slightly larger. |
189201
| **Metadata Size** | Size of the metadata section containing type information and object indices. |
190202
| **Data Offset** | Byte offset where the object data section begins in the file. |
191-
| **Endianness** | Byte order of the file: "Little Endian" (x86, most platforms) or "Big Endian" (older console platforms). |
203+
| **Endianness** | Byte order of the data in the file: "Little Endian" (x86, most platforms) or "Big Endian" (older console platforms). |
192204

193205
---
194206

TestCommon/Data/YamlFormat.asset

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
%YAML 1.1
2+
%TAG !u! tag:unity3d.com,2011:
3+
--- !u!114 &11400000
4+
MonoBehaviour:
5+
m_ObjectHideFlags: 0
6+
m_CorrespondingSourceObject: {fileID: 0}
7+
m_PrefabInstance: {fileID: 0}
8+
m_PrefabAsset: {fileID: 0}
9+
m_GameObject: {fileID: 0}
10+
m_Enabled: 1
11+
m_EditorHideFlags: 0
12+
m_Script: {fileID: 11500000, guid: 070349760c9dbfd4e8318d73401cca23, type: 3}
13+
m_Name: SimpleScriptableObjectAsset1
14+
m_EditorClassIdentifier:
15+
Data: 67

0 commit comments

Comments
 (0)