Skip to content

Commit 0b4ceb0

Browse files
[#70] Document ManagedReferenceRegistry parsing and PPtr callback
Refine the CallbackDelegate comment to clarify the id spaces (objectId and the return value are analyzer/database ids; fileId/pathId are raw PPtr fields) and document the return value. Add a block comment in front of ProcessManagedReferenceRegistry with C# and YAML examples showing the [SerializeReference] "references:" layout, and explain throughout that each entry's data follows the referenced type's own TypeTree (obtained via GetRefTypeTypeTreeRoot) - the reason walking the registry jumps between type trees and is more involved than the rest of the object. Also document the version 1/2 layouts, the terminating sentinel, the FQN string reads, and the registry re-entry guard.
1 parent 541a6e9 commit 0b4ceb0

2 files changed

Lines changed: 75 additions & 5 deletions

File tree

Analyzer/PPtrAndCrcProcessor.cs

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@ namespace UnityDataTools.Analyzer;
1616
// CRC computation can be disabled (skipCrc) while still extracting references.
1717
public class PPtrAndCrcProcessor : IDisposable
1818
{
19+
// Invoked for each PPtr (object reference) found while walking an object.
20+
// objectId - analyzer/database id of the object that contains the reference (the source)
21+
// fileId - PPtr m_FileID: index into the file's external-reference table; 0 means this (local) file
22+
// pathId - PPtr m_PathID: the referenced object's local file id (LFID) within that file
23+
// propertyPath - dotted path to the reference, e.g. "m_MyObject.m_MyArray[2].m_PPtrProperty"
24+
// propertyType - the referenced type, e.g. "Texture2D"
25+
// Returns the analyzer/database id of the referenced object (same id space as objectId), which the
26+
// caller folds into the CRC.
1927
public delegate int CallbackDelegate(long objectId, int fileId, long pathId, string propertyPath, string propertyType);
2028

2129
// Content-addressed stream paths (new ContentDirectory build output) look like
@@ -51,8 +59,12 @@ public class PPtrAndCrcProcessor : IDisposable
5159
// skipCrc: when true, the tree is still walked to emit references but no CRC is computed.
5260
// callback: called for every PPtr found; its return value (the referenced object's id) is
5361
// folded into the CRC.
54-
public PPtrAndCrcProcessor(SerializedFile serializedFile, UnityFileReader reader, string folder,
55-
bool skipCrc, CallbackDelegate callback)
62+
public PPtrAndCrcProcessor(
63+
SerializedFile serializedFile,
64+
UnityFileReader reader,
65+
string folder,
66+
bool skipCrc,
67+
CallbackDelegate callback)
5668
{
5769
m_SerializedFile = serializedFile;
5870
m_Reader = reader;
@@ -163,7 +175,10 @@ private void ProcessNode(TypeTreeNode node, bool isInManagedReferenceRegistry)
163175
}
164176
else if (node.IsManagedReferenceRegistry)
165177
{
166-
// ManagedReferenceRegistry are never nested
178+
// The registry holds this object's [SerializeReference] instances (see
179+
// ProcessManagedReferenceRegistry). It only appears at the top level of the object;
180+
// the guard prevents re-entering it when we are already walking referenced-object
181+
// data through another type tree (isInManagedReferenceRegistry == true).
167182
if (!isInManagedReferenceRegistry)
168183
ProcessManagedReferenceRegistry(node);
169184
}
@@ -219,10 +234,12 @@ private void ProcessArray(TypeTreeNode node, bool isManagedReferenceRegistry, bo
219234
}
220235
else
221236
{
237+
// This is the version-2 "RefIds" array. Each element is a ReferencedObject
238+
// whose children are [rid, type, data]; read the rid here and hand the type
239+
// and data nodes to ProcessManagedReferenceData.
222240
if (dataNode.Children.Count < 3)
223241
throw new Exception("Invalid ReferencedObject");
224242

225-
// First child is rid.
226243
long rid = m_Reader.ReadInt64(m_Offset);
227244
AppendCrc(m_Offset, 8);
228245
m_Offset += 8;
@@ -233,6 +250,47 @@ private void ProcessArray(TypeTreeNode node, bool isManagedReferenceRegistry, bo
233250
}
234251
}
235252

253+
// A ManagedReferenceRegistry holds the [SerializeReference] instances owned by this object.
254+
// In YAML/JSON it is the "references:" section that always appears at the end of a
255+
// MonoBehaviour/ScriptableObject. Each instance is stored here exactly once; the fields that
256+
// point at it (elsewhere in the object) only store its "rid", so shared instances and cycles
257+
// collapse to the same rid.
258+
//
259+
// Given this C# source:
260+
//
261+
// [Serializable] public class MyClass { public string m_string; }
262+
//
263+
// public class MyScriptableObject : ScriptableObject
264+
// {
265+
// [SerializeReference] public MyClass m_refA, m_refB, m_refC; // m_refC assigned m_refB
266+
// }
267+
//
268+
// the serialized layout looks like this (YAML shown; the binary we walk has the same shape):
269+
//
270+
// m_refA: { rid: 4862042034409046192 }
271+
// m_refB: { rid: 4862042034409046193 }
272+
// m_refC: { rid: 4862042034409046193 } // shared instance -> same rid as m_refB
273+
// references:
274+
// version: 2
275+
// RefIds:
276+
// - rid: 4862042034409046192
277+
// type: { class: MyClass, ns: , asm: MyAssembly }
278+
// data: { m_string: foo }
279+
// - rid: 4862042034409046193
280+
// type: { class: MyClass, ns: , asm: MyAssembly }
281+
// data: { m_string: bar }
282+
//
283+
// The complication: TypeTrees cannot express polymorphism, so the layout of each "data" block
284+
// is NOT described by this object's own TypeTree. Each RefId entry names its concrete type
285+
// (class/namespace/assembly), and the "data" bytes follow a SEPARATE TypeTree obtained via
286+
// SerializedFile.GetRefTypeTypeTreeRoot(...). Walking the registry therefore means jumping into
287+
// a different TypeTree for every entry (see ProcessManagedReferenceData) - which is exactly why
288+
// finding references inside the registry is so much more involved than for the rest of the object.
289+
//
290+
// Two on-disk versions exist:
291+
// version 1 - entries stored back to back and terminated by a sentinel type (see
292+
// ProcessManagedReferenceData); the rid is implied by position.
293+
// version 2 - entries stored as a "RefIds" array, each element carrying its own rid.
236294
private void ProcessManagedReferenceRegistry(TypeTreeNode node)
237295
{
238296
if (node.Children.Count < 2)
@@ -251,6 +309,8 @@ private void ProcessManagedReferenceRegistry(TypeTreeNode node)
251309
var refTypeNode = refObjNode.Children[0];
252310
var refObjData = refObjNode.Children[1];
253311

312+
// Read entries until ProcessManagedReferenceData hits the sentinel; here the rid is
313+
// simply the entry's position.
254314
int i = 0;
255315
while (ProcessManagedReferenceData(refTypeNode, refObjData, i++))
256316
{
@@ -280,11 +340,18 @@ private void ProcessManagedReferenceRegistry(TypeTreeNode node)
280340
}
281341
}
282342

343+
// Reads one registry entry: the concrete type's fully-qualified name (class, namespace,
344+
// assembly) followed by the object's data. The data is laid out according to that type's own
345+
// TypeTree, so we fetch it and recurse into it. Returns false at the end of a version-1
346+
// registry - marked either by the "Terminus" sentinel type or by a null/unknown rid (-1 / -2)
347+
// - and true otherwise.
283348
bool ProcessManagedReferenceData(TypeTreeNode refTypeNode, TypeTreeNode referencedTypeDataNode, long rid)
284349
{
285350
if (refTypeNode.Children.Count < 3)
286351
throw new Exception("Invalid ReferencedManagedType");
287352

353+
// The type's fully-qualified name is stored as three consecutive strings: class, namespace,
354+
// then assembly. Each is a length-prefixed string, padded to a 4-byte boundary.
288355
var stringSize = m_Reader.ReadInt32(m_Offset);
289356
AppendCrc(m_Offset, stringSize + 4);
290357
var className = m_Reader.ReadString(m_Offset + 4, stringSize);
@@ -303,15 +370,17 @@ bool ProcessManagedReferenceData(TypeTreeNode refTypeNode, TypeTreeNode referenc
303370
m_Offset += stringSize + 4;
304371
m_Offset = (m_Offset + 3) & ~(3);
305372

373+
// Sentinel that terminates a version-1 registry, plus the null/unknown rids.
306374
if ((className == "Terminus" && namespaceName == "UnityEngine.DMAT" && assemblyName == "FAKE_ASM") ||
307375
rid == -1 || rid == -2)
308376
{
309377
return false;
310378
}
311379

380+
// The data block follows the referenced type's own TypeTree, not this object's, so look it
381+
// up by FQN and walk it (isInManagedReferenceRegistry = true so we don't re-enter the registry).
312382
var refTypeTypeTree = m_SerializedFile.GetRefTypeTypeTreeRoot(className, namespaceName, assemblyName);
313383

314-
// Process the ReferencedObject using its own TypeTree.
315384
var size = m_StringBuilder.Length;
316385
m_StringBuilder.Append("rid(");
317386
m_StringBuilder.Append(rid);

Analyzer/SQLite/Writers/SerializedFileSQLiteWriter.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ public void WriteSerializedFile(string relativePath, string fullPath, string con
269269
}
270270
}
271271

272+
// Callback from PPtrAndCrcProcessor for each reference discovered in the SerializedFile
272273
private int AddReference(long objectId, int fileId, long pathId, string propertyPath, string propertyType)
273274
{
274275
// Always resolve the id so the CRC stays stable; only persist the row when references

0 commit comments

Comments
 (0)