@@ -16,6 +16,14 @@ namespace UnityDataTools.Analyzer;
1616// CRC computation can be disabled (skipCrc) while still extracting references.
1717public class PPtrAndCrcProcessor : IDisposable
1818{
19+ // Invoked for each PPtr (object reference) found while walking an object.
20+ // objectId - analyzer/database id of the object that contains the reference (the source)
21+ // fileId - PPtr m_FileID: index into the file's external-reference table; 0 means this (local) file
22+ // pathId - PPtr m_PathID: the referenced object's local file id (LFID) within that file
23+ // propertyPath - dotted path to the reference, e.g. "m_MyObject.m_MyArray[2].m_PPtrProperty"
24+ // propertyType - the referenced type, e.g. "Texture2D"
25+ // Returns the analyzer/database id of the referenced object (same id space as objectId), which the
26+ // caller folds into the CRC.
1927 public delegate int CallbackDelegate ( long objectId , int fileId , long pathId , string propertyPath , string propertyType ) ;
2028
2129 // Content-addressed stream paths (new ContentDirectory build output) look like
@@ -51,8 +59,12 @@ public class PPtrAndCrcProcessor : IDisposable
5159 // skipCrc: when true, the tree is still walked to emit references but no CRC is computed.
5260 // callback: called for every PPtr found; its return value (the referenced object's id) is
5361 // folded into the CRC.
54- public PPtrAndCrcProcessor ( SerializedFile serializedFile , UnityFileReader reader , string folder ,
55- bool skipCrc , CallbackDelegate callback )
62+ public PPtrAndCrcProcessor (
63+ SerializedFile serializedFile ,
64+ UnityFileReader reader ,
65+ string folder ,
66+ bool skipCrc ,
67+ CallbackDelegate callback )
5668 {
5769 m_SerializedFile = serializedFile ;
5870 m_Reader = reader ;
@@ -163,7 +175,10 @@ private void ProcessNode(TypeTreeNode node, bool isInManagedReferenceRegistry)
163175 }
164176 else if ( node . IsManagedReferenceRegistry )
165177 {
166- // ManagedReferenceRegistry are never nested
178+ // The registry holds this object's [SerializeReference] instances (see
179+ // ProcessManagedReferenceRegistry). It only appears at the top level of the object;
180+ // the guard prevents re-entering it when we are already walking referenced-object
181+ // data through another type tree (isInManagedReferenceRegistry == true).
167182 if ( ! isInManagedReferenceRegistry )
168183 ProcessManagedReferenceRegistry ( node ) ;
169184 }
@@ -219,10 +234,12 @@ private void ProcessArray(TypeTreeNode node, bool isManagedReferenceRegistry, bo
219234 }
220235 else
221236 {
237+ // This is the version-2 "RefIds" array. Each element is a ReferencedObject
238+ // whose children are [rid, type, data]; read the rid here and hand the type
239+ // and data nodes to ProcessManagedReferenceData.
222240 if ( dataNode . Children . Count < 3 )
223241 throw new Exception ( "Invalid ReferencedObject" ) ;
224242
225- // First child is rid.
226243 long rid = m_Reader . ReadInt64 ( m_Offset ) ;
227244 AppendCrc ( m_Offset , 8 ) ;
228245 m_Offset += 8 ;
@@ -233,6 +250,47 @@ private void ProcessArray(TypeTreeNode node, bool isManagedReferenceRegistry, bo
233250 }
234251 }
235252
253+ // A ManagedReferenceRegistry holds the [SerializeReference] instances owned by this object.
254+ // In YAML/JSON it is the "references:" section that always appears at the end of a
255+ // MonoBehaviour/ScriptableObject. Each instance is stored here exactly once; the fields that
256+ // point at it (elsewhere in the object) only store its "rid", so shared instances and cycles
257+ // collapse to the same rid.
258+ //
259+ // Given this C# source:
260+ //
261+ // [Serializable] public class MyClass { public string m_string; }
262+ //
263+ // public class MyScriptableObject : ScriptableObject
264+ // {
265+ // [SerializeReference] public MyClass m_refA, m_refB, m_refC; // m_refC assigned m_refB
266+ // }
267+ //
268+ // the serialized layout looks like this (YAML shown; the binary we walk has the same shape):
269+ //
270+ // m_refA: { rid: 4862042034409046192 }
271+ // m_refB: { rid: 4862042034409046193 }
272+ // m_refC: { rid: 4862042034409046193 } // shared instance -> same rid as m_refB
273+ // references:
274+ // version: 2
275+ // RefIds:
276+ // - rid: 4862042034409046192
277+ // type: { class: MyClass, ns: , asm: MyAssembly }
278+ // data: { m_string: foo }
279+ // - rid: 4862042034409046193
280+ // type: { class: MyClass, ns: , asm: MyAssembly }
281+ // data: { m_string: bar }
282+ //
283+ // The complication: TypeTrees cannot express polymorphism, so the layout of each "data" block
284+ // is NOT described by this object's own TypeTree. Each RefId entry names its concrete type
285+ // (class/namespace/assembly), and the "data" bytes follow a SEPARATE TypeTree obtained via
286+ // SerializedFile.GetRefTypeTypeTreeRoot(...). Walking the registry therefore means jumping into
287+ // a different TypeTree for every entry (see ProcessManagedReferenceData) - which is exactly why
288+ // finding references inside the registry is so much more involved than for the rest of the object.
289+ //
290+ // Two on-disk versions exist:
291+ // version 1 - entries stored back to back and terminated by a sentinel type (see
292+ // ProcessManagedReferenceData); the rid is implied by position.
293+ // version 2 - entries stored as a "RefIds" array, each element carrying its own rid.
236294 private void ProcessManagedReferenceRegistry ( TypeTreeNode node )
237295 {
238296 if ( node . Children . Count < 2 )
@@ -251,6 +309,8 @@ private void ProcessManagedReferenceRegistry(TypeTreeNode node)
251309 var refTypeNode = refObjNode . Children [ 0 ] ;
252310 var refObjData = refObjNode . Children [ 1 ] ;
253311
312+ // Read entries until ProcessManagedReferenceData hits the sentinel; here the rid is
313+ // simply the entry's position.
254314 int i = 0 ;
255315 while ( ProcessManagedReferenceData ( refTypeNode , refObjData , i ++ ) )
256316 {
@@ -280,11 +340,18 @@ private void ProcessManagedReferenceRegistry(TypeTreeNode node)
280340 }
281341 }
282342
343+ // Reads one registry entry: the concrete type's fully-qualified name (class, namespace,
344+ // assembly) followed by the object's data. The data is laid out according to that type's own
345+ // TypeTree, so we fetch it and recurse into it. Returns false at the end of a version-1
346+ // registry - marked either by the "Terminus" sentinel type or by a null/unknown rid (-1 / -2)
347+ // - and true otherwise.
283348 bool ProcessManagedReferenceData ( TypeTreeNode refTypeNode , TypeTreeNode referencedTypeDataNode , long rid )
284349 {
285350 if ( refTypeNode . Children . Count < 3 )
286351 throw new Exception ( "Invalid ReferencedManagedType" ) ;
287352
353+ // The type's fully-qualified name is stored as three consecutive strings: class, namespace,
354+ // then assembly. Each is a length-prefixed string, padded to a 4-byte boundary.
288355 var stringSize = m_Reader . ReadInt32 ( m_Offset ) ;
289356 AppendCrc ( m_Offset , stringSize + 4 ) ;
290357 var className = m_Reader . ReadString ( m_Offset + 4 , stringSize ) ;
@@ -303,15 +370,17 @@ bool ProcessManagedReferenceData(TypeTreeNode refTypeNode, TypeTreeNode referenc
303370 m_Offset += stringSize + 4 ;
304371 m_Offset = ( m_Offset + 3 ) & ~ ( 3 ) ;
305372
373+ // Sentinel that terminates a version-1 registry, plus the null/unknown rids.
306374 if ( ( className == "Terminus" && namespaceName == "UnityEngine.DMAT" && assemblyName == "FAKE_ASM" ) ||
307375 rid == - 1 || rid == - 2 )
308376 {
309377 return false ;
310378 }
311379
380+ // The data block follows the referenced type's own TypeTree, not this object's, so look it
381+ // up by FQN and walk it (isInManagedReferenceRegistry = true so we don't re-enter the registry).
312382 var refTypeTypeTree = m_SerializedFile . GetRefTypeTypeTreeRoot ( className , namespaceName , assemblyName ) ;
313383
314- // Process the ReferencedObject using its own TypeTree.
315384 var size = m_StringBuilder . Length ;
316385 m_StringBuilder . Append ( "rid(" ) ;
317386 m_StringBuilder . Append ( rid ) ;
0 commit comments