Skip to content

Commit e881176

Browse files
committed
Decompiler: First pass on type propagation
1 parent c6aaa5e commit e881176

6 files changed

Lines changed: 276 additions & 66 deletions

File tree

Cpp2IL.Core/Analysis/LocalVariables.cs

Lines changed: 143 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -189,63 +189,154 @@ private static List<Register> GetRegisters(Instruction instruction)
189189
return registers;
190190
}
191191

192-
public static void PropagateTypes(MethodAnalysisContext method)
192+
/// <summary>
193+
/// Resolves field accesses and propagates types together, to a fixpoint, while the method is
194+
/// still in SSA form (every local has a single, version-stable definition).
195+
///
196+
/// The two are mutually enabling and so cannot be ordered as separate passes: a typed base lets
197+
/// <see cref="MetadataResolver.ResolveFieldOffsets"/> turn <c>[base + offset]</c> into a
198+
/// <see cref="FieldReference"/>, a resolved field load types its result with the field's type,
199+
/// and that result is in turn the base of the next access (directly, or after flowing through
200+
/// moves/phis). Both steps are monotonic - each only ever resolves an operand or fills a
201+
/// previously-unknown type - so the loop converges.
202+
/// </summary>
203+
public static void ResolveTypesAndFields(MethodAnalysisContext method)
193204
{
205+
// Seed types from fixed ground truth - the method's own signature, and type-metadata global
206+
// loads. Applied once up front and, being applied first, they win over anything inferred later.
194207
PropagateFromReturn(method);
195208
PropagateFromParameters(method);
196-
PropagateFromCallParameters(method);
197-
PropagateThroughMoves(method);
198-
}
209+
SeedRuntimeClassTypes(method);
199210

200-
private static void PropagateThroughMoves(MethodAnalysisContext method)
201-
{
211+
// Everything else is mutually enabling and so runs to a fixpoint: a typed receiver lets an
212+
// ambiguous call resolve, a resolved call types its return value and arguments, a typed base
213+
// lets a field offset resolve, a field load types its result, and any of those can be the
214+
// receiver/base of the next step. Every pass is monotonic - it only resolves an operand or
215+
// fills a previously-unknown type - so the loop converges.
202216
var changed = true;
203217
var loopCount = 0;
204218

205219
while (changed)
206220
{
221+
if (MaxTypePropagationLoopCount != -1 && ++loopCount > MaxTypePropagationLoopCount)
222+
throw new DecompilerException($"Type and field resolution not settling! (looped {MaxTypePropagationLoopCount} times)");
223+
207224
changed = false;
208-
loopCount++;
225+
changed |= MetadataResolver.ResolveAmbiguousCalls(method);
226+
changed |= PropagateFromCallParameters(method);
227+
changed |= MetadataResolver.ResolveFieldOffsets(method);
228+
changed |= PropagateTypesOnce(method);
229+
}
230+
}
231+
232+
// A type-metadata global load (Move local, typeof(T)) puts the runtime class pointer for T into
233+
// the local - an Il2CppClass*, not an instance of T. That is known exactly from the instruction,
234+
// so it is seeded as ground truth (overriding any prior guess) before the inference fixpoint,
235+
// rather than letting a monotonic pass first mistype the local as T itself.
236+
private static void SeedRuntimeClassTypes(MethodAnalysisContext method)
237+
{
238+
foreach (var instruction in method.ControlFlowGraph!.Instructions)
239+
{
240+
if (instruction.OpCode != OpCode.Move || instruction.Operands.Count < 2)
241+
continue;
242+
243+
if (instruction.Operands[0] is LocalVariable destination && instruction.Operands[1] is TypeAnalysisContext type)
244+
destination.Type = new RuntimeClassTypeAnalysisContext(type, type.DeclaringAssembly);
245+
}
246+
}
247+
248+
// Fills in a local's type only when it is currently unknown, keeping propagation monotonic (a
249+
// type, once set, is never changed) so the fixpoint terminates. Returns whether it set anything.
250+
private static bool SetTypeIfUnknown(LocalVariable local, TypeAnalysisContext? type)
251+
{
252+
if (type == null || local.Type != null)
253+
return false;
209254

210-
if (MaxTypePropagationLoopCount != -1 && loopCount > MaxTypePropagationLoopCount)
211-
throw new DecompilerException($"Type propagation through moves not settling! (looped {MaxTypePropagationLoopCount} times)");
255+
local.Type = type;
256+
return true;
257+
}
212258

213-
foreach (var instruction in method.ControlFlowGraph!.Instructions)
259+
// A single propagation sweep over every move and phi. Returns whether it filled in any type.
260+
private static bool PropagateTypesOnce(MethodAnalysisContext method)
261+
{
262+
var changed = false;
263+
264+
foreach (var instruction in method.ControlFlowGraph!.Instructions)
265+
{
266+
switch (instruction.OpCode)
214267
{
215-
if (instruction.OpCode != OpCode.Move)
216-
continue;
268+
case OpCode.Move:
269+
changed |= PropagateMove(instruction);
270+
break;
271+
case OpCode.Phi:
272+
changed |= PropagatePhi(instruction);
273+
break;
274+
}
275+
}
217276

218-
if (instruction.Operands[0] is LocalVariable destination && instruction.Operands[1] is LocalVariable source)
219-
{
220-
// Move ??, local
221-
if (destination.Type == null && source.Type != null)
222-
{
223-
destination.Type = source.Type;
224-
changed = true;
225-
}
226-
// Move local, ??
227-
else if (source.Type == null && destination.Type != null)
228-
{
229-
source.Type = destination.Type;
230-
changed = true;
231-
}
232-
}
277+
return changed;
278+
}
279+
280+
private static bool PropagateMove(Instruction move)
281+
{
282+
var destination = move.Operands[0];
283+
var source = move.Operands[1];
284+
285+
// Move local, local: copy a known type in whichever direction is missing it.
286+
if (destination is LocalVariable destLocal && source is LocalVariable sourceLocal)
287+
return SetTypeIfUnknown(destLocal, sourceLocal.Type) || SetTypeIfUnknown(sourceLocal, destLocal.Type);
288+
289+
// Move local, field: a field load types its result with the field's type. This is the edge
290+
// that lets the loaded value go on to be the base of a further field access.
291+
if (destination is LocalVariable loadDest && source is FieldReference loadField)
292+
return SetTypeIfUnknown(loadDest, loadField.Field.FieldType);
293+
294+
// Move field, local: a field store types the stored value with the field's type.
295+
if (destination is FieldReference storeField && source is LocalVariable storeSource)
296+
return SetTypeIfUnknown(storeSource, storeField.Field.FieldType);
297+
298+
return false;
299+
}
300+
301+
// A phi is a copy from each predecessor's value, so types flow both ways across it - mirroring
302+
// the bidirectional Move copies it decays into once SSA is destroyed.
303+
private static bool PropagatePhi(Instruction phi)
304+
{
305+
if (phi.Operands[0] is not LocalVariable destination)
306+
return false;
233307

234-
if (instruction.Operands[0] is LocalVariable destination2 && instruction.Operands[1] is TypeAnalysisContext source2)
308+
var changed = false;
309+
310+
// Forward: an untyped phi result takes the type of any typed input.
311+
if (destination.Type == null)
312+
{
313+
for (var i = 1; i < phi.Operands.Count; i++)
314+
{
315+
if (phi.Operands[i] is LocalVariable { Type: { } inputType })
235316
{
236-
// Move ??, type
237-
if (destination2.Type == null)
238-
{
239-
destination2.Type = source2;
240-
changed = true;
241-
}
317+
changed = SetTypeIfUnknown(destination, inputType);
318+
break;
242319
}
243320
}
244321
}
322+
323+
// Backward: a typed phi result types each of its still-untyped inputs.
324+
if (destination.Type != null)
325+
{
326+
for (var i = 1; i < phi.Operands.Count; i++)
327+
{
328+
if (phi.Operands[i] is LocalVariable input)
329+
changed |= SetTypeIfUnknown(input, destination.Type);
330+
}
331+
}
332+
333+
return changed;
245334
}
246335

247-
private static void PropagateFromCallParameters(MethodAnalysisContext method)
336+
private static bool PropagateFromCallParameters(MethodAnalysisContext method)
248337
{
338+
var changed = false;
339+
249340
foreach (var instruction in method.ControlFlowGraph!.Instructions)
250341
{
251342
if (!instruction.IsCall)
@@ -254,46 +345,39 @@ private static void PropagateFromCallParameters(MethodAnalysisContext method)
254345
if (instruction.Operands[0] is not MethodAnalysisContext calledMethod)
255346
continue;
256347

257-
// Constructor, set return variable type
258-
if (calledMethod.Name == ".ctor" || calledMethod.Name == ".cctor")
348+
// Return value: a constructor yields its declaring type, otherwise the declared return type.
349+
if (instruction.Destination is LocalVariable returnValue)
259350
{
260-
if (instruction.Destination is LocalVariable constructorReturn)
261-
{
262-
constructorReturn.Type = calledMethod.DeclaringType;
263-
continue;
264-
}
265-
}
266-
else // Return value
267-
{
268-
if (instruction.Destination is LocalVariable returnValue)
269-
returnValue.Type = calledMethod.ReturnType;
351+
changed |= SetTypeIfUnknown(returnValue,
352+
calledMethod.Name is ".ctor" or ".cctor" ? calledMethod.DeclaringType : calledMethod.ReturnType);
270353
}
271354

272355
// 'this' param
273-
if (!calledMethod.IsStatic)
356+
if (!calledMethod.IsStatic
357+
&& instruction.Operands[instruction.OpCode == OpCode.CallVoid ? 1 : 2] is LocalVariable thisParam)
274358
{
275-
if (instruction.Operands[instruction.OpCode == OpCode.CallVoid ? 1 : 2] is LocalVariable thisParam)
276-
thisParam.Type = calledMethod.DeclaringType;
359+
changed |= SetTypeIfUnknown(thisParam, calledMethod.DeclaringType);
277360
}
278361

279-
// Set types
362+
// Remaining arguments map positionally onto the callee's declared parameters.
280363
var paramOffset = calledMethod.IsStatic ? 1 : 2;
281-
if (instruction.OpCode == OpCode.Call) // Skip return value
364+
if (instruction.OpCode == OpCode.Call) // Skip the return value operand
282365
paramOffset += 1;
283366

284367
for (var i = paramOffset; i < instruction.Operands.Count; i++)
285368
{
286-
var operand = instruction.Operands[i];
369+
if (instruction.Operands[i] is not LocalVariable local)
370+
continue;
287371

288-
if (operand is LocalVariable local)
289-
{
290-
if ((i - paramOffset) > calledMethod.Parameters.Count - 1) // Probably MethodInfo*
291-
continue;
372+
var parameterIndex = i - paramOffset;
373+
if (parameterIndex > calledMethod.Parameters.Count - 1) // Probably MethodInfo*
374+
continue;
292375

293-
local.Type = calledMethod.Parameters[i - paramOffset].ParameterType;
294-
}
376+
changed |= SetTypeIfUnknown(local, calledMethod.Parameters[parameterIndex].ParameterType);
295377
}
296378
}
379+
380+
return changed;
297381
}
298382

299383
private static void PropagateFromParameters(MethodAnalysisContext method)

Cpp2IL.Core/Analysis/MetadataResolver.cs

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ public static class MetadataResolver
1414
public static void ResolveAll(MethodAnalysisContext method)
1515
{
1616
ResolveCalls(method);
17-
ResolveFieldOffsets(method);
1817
ResolveGetter(method);
1918
ResolveStrings(method);
2019
}
@@ -52,8 +51,17 @@ private static void ResolveStrings(MethodAnalysisContext method)
5251
}
5352
}
5453

55-
private static void ResolveFieldOffsets(MethodAnalysisContext method)
54+
/// <summary>
55+
/// Replaces every <c>[base + addend]</c> memory operand whose base is a typed local with a
56+
/// <see cref="FieldReference"/> to the field at that offset. Returns whether any operand was
57+
/// resolved this pass, so the type/field fixpoint can detect convergence: as more bases become
58+
/// typed (a field load types its result, which is the base of the next load), more offsets
59+
/// resolve, so this is re-run until it stops finding new fields.
60+
/// </summary>
61+
public static bool ResolveFieldOffsets(MethodAnalysisContext method)
5662
{
63+
var changed = false;
64+
5765
foreach (var instruction in method.ControlFlowGraph!.Instructions)
5866
{
5967
for (var i = 0; i < instruction.Operands.Count; i++)
@@ -76,8 +84,11 @@ private static void ResolveFieldOffsets(MethodAnalysisContext method)
7684
continue;
7785

7886
instruction.Operands[i] = new FieldReference(field, local, (int)memory.Addend);
87+
changed = true;
7988
}
8089
}
90+
91+
return changed;
8192
}
8293

8394
private static void ResolveCalls(MethodAnalysisContext method)
@@ -107,6 +118,9 @@ private static void ResolveCalls(MethodAnalysisContext method)
107118
if (!method.AppContext.MethodsByAddress.TryGetValue(target, out var targetMethods))
108119
continue;
109120

121+
// Several methods can share one address (identical native code merged by the linker, or
122+
// generic sharing). Those are left as a numeric target here and disambiguated later by
123+
// receiver type in ResolveAmbiguousCalls, once types are known.
110124
if (targetMethods is not [{ } singleTargetMethod])
111125
continue;
112126

@@ -116,6 +130,72 @@ private static void ResolveCalls(MethodAnalysisContext method)
116130
method.ControlFlowGraph.MergeCallBlocks();
117131
}
118132

133+
/// <summary>
134+
/// Resolves calls whose address maps to more than one method by matching the receiver's known
135+
/// type against the candidates' declaring types. Runs inside the type/field fixpoint and so
136+
/// re-fires as receivers become typed - a resolved call types its return value, which can type
137+
/// the receiver of a further call. Returns whether any call was resolved this pass.
138+
///
139+
/// Conservative by design: it commits only when exactly one non-static candidate's declaring
140+
/// type matches the receiver's type. Anything still untyped or ambiguous is left for a later
141+
/// pass, or left unresolved - it never guesses.
142+
/// </summary>
143+
public static bool ResolveAmbiguousCalls(MethodAnalysisContext method)
144+
{
145+
var changed = false;
146+
147+
foreach (var instruction in method.ControlFlowGraph!.Instructions)
148+
{
149+
if (!instruction.IsCall)
150+
continue;
151+
152+
var target = instruction.Operands[0];
153+
154+
// A resolved call's target is a method/key-function name; only unresolved ones are still numeric.
155+
if (!target.IsNumeric())
156+
continue;
157+
158+
if (!method.AppContext.MethodsByAddress.TryGetValue((ulong)target, out var candidates) || candidates.Count < 2)
159+
continue;
160+
161+
if (GetReceiver(instruction) is not { Type: { } receiverType })
162+
continue;
163+
164+
MethodAnalysisContext? match = null;
165+
var ambiguous = false;
166+
167+
foreach (var candidate in candidates)
168+
{
169+
if (candidate.IsStatic || !ReferenceEquals(candidate.DeclaringType, receiverType))
170+
continue;
171+
172+
if (match != null)
173+
{
174+
ambiguous = true;
175+
break;
176+
}
177+
178+
match = candidate;
179+
}
180+
181+
if (ambiguous || match == null)
182+
continue;
183+
184+
instruction.Operands[0] = match;
185+
changed = true;
186+
}
187+
188+
return changed;
189+
}
190+
191+
// The receiver ('this') of a call is the first integer-slot argument: operand 1 for CallVoid
192+
// (after the target), operand 2 for Call (after the target and the return value).
193+
private static LocalVariable? GetReceiver(Instruction call)
194+
{
195+
var index = call.OpCode == OpCode.CallVoid ? 1 : 2;
196+
return index < call.Operands.Count ? call.Operands[index] as LocalVariable : null;
197+
}
198+
119199
private static void HandleKeyFunction(ApplicationAnalysisContext appContext, Instruction instruction, ulong target, BaseKeyFunctionAddresses kFA)
120200
{
121201
var method = "";

0 commit comments

Comments
 (0)