Skip to content

Commit 84c3d2a

Browse files
Directly parse SerializeFile to detect format
1 parent b92c1b2 commit 84c3d2a

5 files changed

Lines changed: 648 additions & 44 deletions

File tree

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
using System;
2+
using System.IO;
3+
using NUnit.Framework;
4+
using UnityDataTools.Analyzer.Util;
5+
using UnityDataTools.FileSystem;
6+
7+
namespace UnityDataTools.Analyzer.Tests;
8+
9+
/// <summary>
10+
/// Tests for file format detection utilities (ArchiveDetector and SerializedFileDetector).
11+
/// </summary>
12+
public class FileDetectionTests
13+
{
14+
private string m_TestDataPath;
15+
16+
[OneTimeSetUp]
17+
public void OneTimeSetUp()
18+
{
19+
m_TestDataPath = Path.Combine(TestContext.CurrentContext.TestDirectory, "Data");
20+
UnityFileSystem.Init();
21+
}
22+
23+
[OneTimeTearDown]
24+
public void OneTimeTearDown()
25+
{
26+
UnityFileSystem.Cleanup();
27+
}
28+
29+
#region SerializedFile Detection Tests
30+
31+
[Test]
32+
public void TryDetectSerializedFile_ValidPlayerDataFile_ReturnsTrue()
33+
{
34+
var testFile = Path.Combine(m_TestDataPath, "PlayerData", "2022.1.20f1", "level0");
35+
36+
bool result = SerializedFileDetector.TryDetectSerializedFile(testFile, out var info);
37+
38+
Assert.IsTrue(result, "level0 should be detected as a valid SerializedFile");
39+
Assert.IsNotNull(info);
40+
41+
// Verify exact values from the level0 file header
42+
Assert.That(info.Version, Is.EqualTo(22u), "Version should be 22");
43+
Assert.That(info.FileSize, Is.EqualTo(31988UL), "FileSize should be 31988");
44+
Assert.That(info.MetadataSize, Is.EqualTo(24580UL), "MetadataSize should be 24580");
45+
Assert.That(info.DataOffset, Is.EqualTo(24640UL), "DataOffset should be 24640");
46+
Assert.That(info.Endianness, Is.EqualTo((byte)1), "Endianness should be 1 (BigEndian)");
47+
Assert.IsFalse(info.IsLegacyFormat, "Version 22 uses modern format (64-bit header)");
48+
}
49+
50+
[Test]
51+
public void TryDetectSerializedFile_SerializedFileInsideArchive_ReturnsTrue()
52+
{
53+
// This tests a serialized file extracted from the alienprefab archive
54+
// The file was originally at CAB-c5053efeda8860d7e7b7ce4b4c66705b inside the archive
55+
var testFile = Path.Combine(m_TestDataPath, "LegacyFormats", "CAB-c5053efeda8860d7e7b7ce4b4c66705b");
56+
57+
bool result = SerializedFileDetector.TryDetectSerializedFile(testFile, out var info);
58+
59+
Assert.IsTrue(result, "CAB-c5053efeda8860d7e7b7ce4b4c66705b should be detected as a valid SerializedFile");
60+
Assert.IsNotNull(info);
61+
62+
// Verify exact values from the CAB file header
63+
Assert.That(info.Version, Is.EqualTo(17u), "Version should be 17");
64+
Assert.That(info.FileSize, Is.EqualTo(595380UL), "FileSize should be 595380");
65+
Assert.That(info.MetadataSize, Is.EqualTo(61328UL), "MetadataSize should be 61328");
66+
Assert.That(info.DataOffset, Is.EqualTo(61360UL), "DataOffset should be 61360");
67+
Assert.That(info.Endianness, Is.EqualTo((byte)1), "Endianness should be 1 (BigEndian)");
68+
Assert.IsTrue(info.IsLegacyFormat, "Version 17 uses legacy format (32-bit header)");
69+
}
70+
71+
[Test]
72+
public void TryDetectSerializedFile_JsonFile_ReturnsFalse()
73+
{
74+
var testFiles = Directory.GetFiles(Path.Combine(m_TestDataPath, "AddressableBuildLayouts"), "*.json");
75+
Assert.Greater(testFiles.Length, 0, "Should have at least one JSON test file");
76+
77+
foreach (var testFile in testFiles)
78+
{
79+
bool result = SerializedFileDetector.TryDetectSerializedFile(testFile, out var info);
80+
81+
Assert.IsFalse(result, $"{Path.GetFileName(testFile)} should not be detected as a SerializedFile");
82+
Assert.IsNull(info, "Info should be null for non-SerializedFile");
83+
}
84+
}
85+
86+
[Test]
87+
public void TryDetectSerializedFile_TextFile_ReturnsFalse()
88+
{
89+
var testFile = Path.Combine(m_TestDataPath, "PlayerNoTypeTree", "README.md");
90+
91+
bool result = SerializedFileDetector.TryDetectSerializedFile(testFile, out var info);
92+
93+
Assert.IsFalse(result, "README.md should not be detected as a SerializedFile");
94+
Assert.IsNull(info);
95+
}
96+
97+
[Test]
98+
public void TryDetectSerializedFile_EmptyFile_ReturnsFalse()
99+
{
100+
// Create a temporary empty file
101+
var tempFile = Path.GetTempFileName();
102+
try
103+
{
104+
bool result = SerializedFileDetector.TryDetectSerializedFile(tempFile, out var info);
105+
106+
Assert.IsFalse(result, "Empty file should not be detected as a SerializedFile");
107+
Assert.IsNull(info);
108+
}
109+
finally
110+
{
111+
File.Delete(tempFile);
112+
}
113+
}
114+
115+
[Test]
116+
public void TryDetectSerializedFile_TruncatedHeader_ReturnsFalse()
117+
{
118+
// Create a temporary file with only partial header (10 bytes)
119+
var tempFile = Path.GetTempFileName();
120+
try
121+
{
122+
File.WriteAllBytes(tempFile, new byte[10]); // Less than minimum header size (20 bytes)
123+
124+
bool result = SerializedFileDetector.TryDetectSerializedFile(tempFile, out var info);
125+
126+
Assert.IsFalse(result, "Truncated file should not be detected as a SerializedFile");
127+
Assert.IsNull(info);
128+
}
129+
finally
130+
{
131+
File.Delete(tempFile);
132+
}
133+
}
134+
135+
[Test]
136+
public void TryDetectSerializedFile_RandomBytes_ReturnsFalse()
137+
{
138+
// Create a temporary file with random bytes
139+
var tempFile = Path.GetTempFileName();
140+
try
141+
{
142+
var random = new Random(12345); // Fixed seed for reproducibility
143+
byte[] randomData = new byte[100];
144+
random.NextBytes(randomData);
145+
File.WriteAllBytes(tempFile, randomData);
146+
147+
bool result = SerializedFileDetector.TryDetectSerializedFile(tempFile, out var info);
148+
149+
Assert.IsFalse(result, "Random bytes should not be detected as a SerializedFile");
150+
Assert.IsNull(info);
151+
}
152+
finally
153+
{
154+
File.Delete(tempFile);
155+
}
156+
}
157+
158+
[Test]
159+
public void TryDetectSerializedFile_NonExistentFile_ReturnsFalse()
160+
{
161+
var nonExistentFile = Path.Combine(m_TestDataPath, "ThisFileDoesNotExist.xyz");
162+
163+
bool result = SerializedFileDetector.TryDetectSerializedFile(nonExistentFile, out var info);
164+
165+
Assert.IsFalse(result, "Non-existent file should not be detected as a SerializedFile");
166+
Assert.IsNull(info);
167+
}
168+
169+
#endregion
170+
171+
#region Archive Detection Tests
172+
173+
[Test]
174+
public void IsUnityArchive_ValidAssetBundle_ReturnsTrue()
175+
{
176+
var testFile = Path.Combine(m_TestDataPath, "AssetBundles", "2022.1.20f1", "assetbundle");
177+
178+
bool result = ArchiveDetector.IsUnityArchive(testFile);
179+
180+
Assert.IsTrue(result, "assetbundle should be detected as a Unity Archive");
181+
}
182+
183+
[Test]
184+
public void IsUnityArchive_OldFormatArchive_ReturnsTrue()
185+
{
186+
var testFile = Path.Combine(m_TestDataPath, "LegacyFormats", "alienprefab");
187+
188+
bool result = ArchiveDetector.IsUnityArchive(testFile);
189+
190+
Assert.IsTrue(result, "alienprefab should be detected as a Unity Archive");
191+
}
192+
193+
[Test]
194+
public void IsUnityArchive_SerializedFile_ReturnsFalse()
195+
{
196+
var testFile = Path.Combine(m_TestDataPath, "PlayerData", "2022.1.20f1", "level0");
197+
198+
bool result = ArchiveDetector.IsUnityArchive(testFile);
199+
200+
Assert.IsFalse(result, "level0 (SerializedFile) should not be detected as an archive");
201+
}
202+
203+
[Test]
204+
public void IsUnityArchive_JsonFile_ReturnsFalse()
205+
{
206+
var testFiles = Directory.GetFiles(Path.Combine(m_TestDataPath, "AddressableBuildLayouts"), "*.json");
207+
Assert.Greater(testFiles.Length, 0, "Should have at least one JSON test file");
208+
209+
foreach (var testFile in testFiles)
210+
{
211+
bool result = ArchiveDetector.IsUnityArchive(testFile);
212+
213+
Assert.IsFalse(result, $"{Path.GetFileName(testFile)} should not be detected as an archive");
214+
}
215+
}
216+
217+
[Test]
218+
public void IsUnityArchive_EmptyFile_ReturnsFalse()
219+
{
220+
var tempFile = Path.GetTempFileName();
221+
try
222+
{
223+
bool result = ArchiveDetector.IsUnityArchive(tempFile);
224+
225+
Assert.IsFalse(result, "Empty file should not be detected as an archive");
226+
}
227+
finally
228+
{
229+
File.Delete(tempFile);
230+
}
231+
}
232+
233+
[Test]
234+
public void IsUnityArchive_NonExistentFile_ReturnsFalse()
235+
{
236+
var nonExistentFile = Path.Combine(m_TestDataPath, "ThisFileDoesNotExist.xyz");
237+
238+
bool result = ArchiveDetector.IsUnityArchive(nonExistentFile);
239+
240+
Assert.IsFalse(result, "Non-existent file should not be detected as an archive");
241+
}
242+
243+
#endregion
244+
}

Analyzer/SQLite/Parsers/SerializedFileParser.cs

Lines changed: 19 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using Microsoft.Data.Sqlite;
55
using UnityDataTools.Analyzer.SQLite.Handlers;
66
using UnityDataTools.Analyzer.SQLite.Writers;
7+
using UnityDataTools.Analyzer.Util;
78
using UnityDataTools.FileSystem;
89

910
namespace UnityDataTools.Analyzer.SQLite.Parsers
@@ -17,7 +18,14 @@ public class SerializedFileParser : ISQLiteFileParser
1718

1819
public bool CanParse(string filename)
1920
{
20-
return ShouldIgnoreFile(filename) == false;
21+
// First check if the file is in the ignore list (by extension or filename)
22+
if (ShouldIgnoreFile(filename))
23+
return false;
24+
25+
// Then validate that it's actually a Unity file by checking its format
26+
// This prevents ugly exceptions when processing non-Unity files
27+
return ArchiveDetector.IsUnityArchive(filename)
28+
|| SerializedFileDetector.TryDetectSerializedFile(filename, out _);
2129
}
2230

2331

@@ -40,12 +48,12 @@ public void Parse(string filename)
4048

4149
bool ShouldIgnoreFile(string file)
4250
{
43-
// Unfortunately there is no standard extension for AssetBundles, and SerializedFiles often have no extension at all.
44-
// Also there is also no distinctive signature at the start of a SerializedFile to immediately recognize it based on its first bytes.
45-
// This makes it difficult to use the "--search-pattern" argument to only pick those files.
46-
47-
// Hence to reduce noise in UnityDataTool output we filter out files that we have a high confidence are
48-
// NOT SerializedFiles or Unity Archives.
51+
// Filter out common non-Unity files by extension or filename.
52+
// This is a fast initial filter before we perform format detection.
53+
//
54+
// Note: AssetBundles have no standard extension, and SerializedFiles often have no extension at all.
55+
// Format detection (via ArchiveDetector and SerializedFileDetector) is performed after this filter
56+
// to definitively identify Unity files.
4957

5058
string fileName = Path.GetFileName(file);
5159
string extension = Path.GetExtension(file);
@@ -69,7 +77,7 @@ bool ShouldIgnoreFile(string file)
6977

7078
void ProcessFile(string file, string rootDirectory)
7179
{
72-
if (IsUnityArchive(file))
80+
if (ArchiveDetector.IsUnityArchive(file))
7381
{
7482
bool archiveHadErrors = false;
7583
using (UnityArchive archive = UnityFileSystem.MountArchive(file, "archive:" + Path.DirectorySeparatorChar))
@@ -122,45 +130,12 @@ void ProcessFile(string file, string rootDirectory)
122130
}
123131
else
124132
{
125-
// This isn't a Unity Archive file. Try to open it as a SerializedFile.
126-
// Unfortunately there is no standard file extension, or clear signature at the start of the file,
127-
// to test if it truly is a SerializedFile. So this will process files that are clearly not unity build files,
128-
// and there is a chance for crashes and freezes if the parser misinterprets the file content.
133+
// This isn't a Unity Archive file, so process it as a SerializedFile.
134+
// Note: The file has already been validated in CanParse() via SerializedFileDetector,
135+
// so we're confident it's a valid SerializedFile at this point.
129136
var relativePath = Path.GetRelativePath(rootDirectory, file);
130137
m_Writer.WriteSerializedFile(relativePath, file, Path.GetDirectoryName(file));
131138
}
132139
}
133-
134-
private static bool IsUnityArchive(string filePath)
135-
{
136-
// Check whether a file is a Unity Archive (AssetBundle) by looking for known signatures at the start of the file.
137-
// "UnifyFS" is the current signature, but some older formats of the file are still supported
138-
string[] signatures = { "UnityFS", "UnityWeb", "UnityRaw", "UnityArchive" };
139-
int maxLen = 12; // "UnityArchive".Length
140-
byte[] buffer = new byte[maxLen];
141-
142-
using (var fs = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read))
143-
{
144-
int read = fs.Read(buffer, 0, buffer.Length);
145-
foreach (var sig in signatures)
146-
{
147-
if (read >= sig.Length)
148-
{
149-
bool match = true;
150-
for (int i = 0; i < sig.Length; ++i)
151-
{
152-
if (buffer[i] != sig[i])
153-
{
154-
match = false;
155-
break;
156-
}
157-
}
158-
if (match)
159-
return true;
160-
}
161-
}
162-
return false;
163-
}
164-
}
165140
}
166141
}

0 commit comments

Comments
 (0)