Skip to content

Commit 808d238

Browse files
authored
Fix DoFnInvoker cache collision for generic types (#37355)
This fixes a bug where ByteBuddyDoFnInvokerFactory would return the same cached invoker for different generic instantiations of the same DoFn class. Changes: 1. Introduced InvokerCacheKey with TypeDescriptors to ensure unique cache entries. 2. Updated generateInvokerClass to append type-based hash suffix. 3. Added regression test (testCacheKeyCollisionProof). This PR fixes a critical issue where ByteBuddyDoFnInvokerFactory failed to distinguish between different generic instantiations of the same DoFn class (e.g., MyFn<String> vs MyFn<Integer>). 1. Cache Key Strategy: Introduced InvokerCacheKey to include input/output TypeDescriptors in the cache lookup. 2. Class Naming: Updated generateInvokerClass to append a type-based hash suffix to ensure unique class names. 3. Robustness (The Fix): Added defensive try-catch blocks when accessing TypeDescriptors. - Some internal transforms (like MapElements) throw IllegalStateException if getOutputTypeDescriptor() is called after serialization. - In these cases, the factory now gracefully falls back to using Object.class (legacy behavior), ensuring backward compatibility for transforms that do not retain type information at runtime.
1 parent f2765ca commit 808d238

7 files changed

Lines changed: 181 additions & 22 deletions

File tree

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
22
"comment": "Modify this file in a trivial way to cause this test suite to run",
33
"modification": 4
4-
}
4+
}

.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Dataflow.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
"comment": "Modify this file in a trivial way to cause this test suite to run!",
33
"modification": 3,
44
}
5+

.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Direct.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
"https://github.com/apache/beam/pull/31761": "noting that PR #31761 should run this test",
77
"https://github.com/apache/beam/pull/35159": "moving WindowedValue and making an interface"
88
}
9+

.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Flink.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
"runFor": "#33606",
88
"https://github.com/apache/beam/pull/35159": "moving WindowedValue and making an interface"
99
}
10+
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"comment": "Modify this file in a trivial way to cause this test suite to run.",
33
"pr": "36271",
4-
"modification": 38
5-
}
4+
"modification": 39
5+
}

sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java

Lines changed: 125 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import java.util.ArrayList;
2828
import java.util.List;
2929
import java.util.Map;
30+
import java.util.Objects;
3031
import java.util.concurrent.ConcurrentHashMap;
3132
import net.bytebuddy.ByteBuddy;
3233
import net.bytebuddy.description.field.FieldDescription;
@@ -106,6 +107,7 @@
106107
import org.apache.beam.sdk.util.UserCodeException;
107108
import org.apache.beam.sdk.values.TypeDescriptor;
108109
import org.apache.beam.sdk.values.TypeDescriptors;
110+
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.MoreObjects;
109111
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps;
110112
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.primitives.Primitives;
111113
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
@@ -166,15 +168,66 @@ public <InputT, OutputT> DoFnInvoker<InputT, OutputT> invokerFor(DoFn<InputT, Ou
166168
private static final String FN_DELEGATE_FIELD_NAME = "delegate";
167169

168170
/**
169-
* A cache of constructors of generated {@link DoFnInvoker} classes, keyed by {@link DoFn} class.
170-
* Needed because generating an invoker class is expensive, and to avoid generating an excessive
171-
* number of classes consuming PermGen memory.
171+
* Cache key for DoFnInvoker constructors that includes both the DoFn class and its generic type
172+
* parameters to prevent collisions when the same DoFn class is used with different generic types.
173+
*/
174+
private static final class InvokerCacheKey {
175+
private final Class<? extends DoFn<?, ?>> fnClass;
176+
private final TypeDescriptor<?> inputType;
177+
private final TypeDescriptor<?> outputType;
178+
179+
InvokerCacheKey(
180+
Class<? extends DoFn<?, ?>> fnClass,
181+
TypeDescriptor<?> inputType,
182+
TypeDescriptor<?> outputType) {
183+
this.fnClass = fnClass;
184+
this.inputType = inputType;
185+
this.outputType = outputType;
186+
}
187+
188+
@Override
189+
public boolean equals(@Nullable Object o) {
190+
if (this == o) {
191+
return true;
192+
}
193+
if (!(o instanceof InvokerCacheKey)) {
194+
return false;
195+
}
196+
InvokerCacheKey that = (InvokerCacheKey) o;
197+
return Objects.equals(fnClass, that.fnClass)
198+
&& Objects.equals(inputType, that.inputType)
199+
&& Objects.equals(outputType, that.outputType);
200+
}
201+
202+
@Override
203+
public int hashCode() {
204+
return Objects.hash(fnClass, inputType, outputType);
205+
}
206+
207+
@Override
208+
public String toString() {
209+
return MoreObjects.toStringHelper(this)
210+
.add("fnClass", fnClass.getName())
211+
.add("inputType", inputType)
212+
.add("outputType", outputType)
213+
.toString();
214+
}
215+
}
216+
217+
/**
218+
* A cache of constructors of generated {@link DoFnInvoker} classes, keyed by {@link DoFn} class
219+
* and its generic type parameters. Needed because generating an invoker class is expensive, and
220+
* to avoid generating an excessive number of classes consuming PermGen memory.
221+
*
222+
* <p>The cache key includes generic type information to prevent collisions when the same DoFn
223+
* class is used with different generic types (e.g., MyDoFn&lt;String&gt; vs
224+
* MyDoFn&lt;Integer&gt;).
172225
*
173226
* <p>Note that special care must be taken to enumerate this object as concurrent hash maps are <a
174227
* href="https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/package-summary.html#Weakly>weakly
175228
* consistent</a>.
176229
*/
177-
private final Map<Class<?>, Constructor<?>> byteBuddyInvokerConstructorCache =
230+
private final Map<InvokerCacheKey, Constructor<?>> byteBuddyInvokerConstructorCache =
178231
new ConcurrentHashMap<>();
179232

180233
private ByteBuddyDoFnInvokerFactory() {}
@@ -265,11 +318,39 @@ public <InputT, OutputT> DoFnInvoker<InputT, OutputT> newByteBuddyInvoker(
265318
signature.fnClass(),
266319
fn.getClass());
267320

321+
// Extract input and output type descriptors to distinguish generic instantiations.
322+
// Fall back to Object.class if unavailable. When type info is lost, different generic
323+
// instantiations share an invoker, which is acceptable since the DoFn class in the cache
324+
// key prevents collisions between different DoFn classes.
325+
TypeDescriptor<InputT> inputType;
326+
try {
327+
inputType = fn.getInputTypeDescriptor();
328+
} catch (Exception e) {
329+
// Some DoFns (like MapElements) throw IllegalStateException if queried after
330+
// serialization.
331+
// In this case, we fall back to the raw class behavior (Object).
332+
inputType = null;
333+
}
334+
if (inputType == null) {
335+
inputType = (TypeDescriptor<InputT>) TypeDescriptor.of(Object.class);
336+
}
337+
338+
TypeDescriptor<OutputT> outputType;
339+
try {
340+
outputType = fn.getOutputTypeDescriptor();
341+
} catch (Exception e) {
342+
// Same as above: fall back to Object if type info is unavailable.
343+
outputType = null;
344+
}
345+
if (outputType == null) {
346+
outputType = (TypeDescriptor<OutputT>) TypeDescriptor.of(Object.class);
347+
}
348+
268349
try {
269350
@SuppressWarnings("unchecked")
270351
DoFnInvokerBase<InputT, OutputT, DoFn<InputT, OutputT>> invoker =
271352
(DoFnInvokerBase<InputT, OutputT, DoFn<InputT, OutputT>>)
272-
getByteBuddyInvokerConstructor(signature).newInstance(fn);
353+
getByteBuddyInvokerConstructor(signature, inputType, outputType).newInstance(fn);
273354

274355
if (signature.onTimerMethods() != null) {
275356
for (OnTimerMethod onTimerMethod : signature.onTimerMethods().values()) {
@@ -297,19 +378,24 @@ public <InputT, OutputT> DoFnInvoker<InputT, OutputT> newByteBuddyInvoker(
297378
}
298379

299380
/**
300-
* Returns a generated constructor for a {@link DoFnInvoker} for the given {@link DoFn} class.
381+
* Returns a generated constructor for a {@link DoFnInvoker} for the given {@link DoFnSignature}
382+
* and specific generic types.
301383
*
302384
* <p>These are cached such that at most one {@link DoFnInvoker} class exists for a given {@link
303-
* DoFn} class.
385+
* DoFn} class with specific generic type parameters. Different generic instantiations of the same
386+
* DoFn class will have separate cached invoker classes.
304387
*/
305-
private Constructor<?> getByteBuddyInvokerConstructor(DoFnSignature signature) {
388+
private Constructor<?> getByteBuddyInvokerConstructor(
389+
DoFnSignature signature, TypeDescriptor<?> inputType, TypeDescriptor<?> outputType) {
306390
Class<? extends DoFn<?, ?>> fnClass = signature.fnClass();
391+
InvokerCacheKey cacheKey = new InvokerCacheKey(fnClass, inputType, outputType);
307392
return byteBuddyInvokerConstructorCache.computeIfAbsent(
308-
fnClass,
309-
clazz -> {
310-
Class<? extends DoFnInvoker<?, ?>> invokerClass = generateInvokerClass(signature);
393+
cacheKey,
394+
key -> {
395+
Class<? extends DoFnInvoker<?, ?>> invokerClass =
396+
generateInvokerClass(signature, inputType, outputType);
311397
try {
312-
return invokerClass.getConstructor(clazz);
398+
return invokerClass.getConstructor(fnClass);
313399
} catch (IllegalArgumentException | NoSuchMethodException | SecurityException e) {
314400
throw new RuntimeException(e);
315401
}
@@ -456,19 +542,42 @@ public static double validateSize(double size) {
456542
}
457543
}
458544

545+
/**
546+
* Generates a type suffix string for use in invoker class names.
547+
*
548+
* <p>This creates a unique suffix based on the input and output type descriptors to avoid class
549+
* name collisions when the same DoFn class is used with different generic types.
550+
*
551+
* <p>The format is: {@code DoFnInvoker$<8-digit hex hash>}
552+
*
553+
* @param inputType the input type descriptor
554+
* @param outputType the output type descriptor
555+
* @return a string suffix for the invoker class name
556+
*/
557+
public static String generateTypeSuffix(
558+
TypeDescriptor<?> inputType, TypeDescriptor<?> outputType) {
559+
return String.format(
560+
"%s$%08x",
561+
DoFnInvoker.class.getSimpleName(),
562+
(inputType.toString() + "|" + outputType.toString()).hashCode());
563+
}
564+
459565
/** Generates a {@link DoFnInvoker} class for the given {@link DoFnSignature}. */
460-
private static Class<? extends DoFnInvoker<?, ?>> generateInvokerClass(DoFnSignature signature) {
566+
private static Class<? extends DoFnInvoker<?, ?>> generateInvokerClass(
567+
DoFnSignature signature, TypeDescriptor<?> inputType, TypeDescriptor<?> outputType) {
461568
Class<? extends DoFn<?, ?>> fnClass = signature.fnClass();
462569

570+
// Create a unique suffix based on the type descriptors to avoid class name collisions
571+
// when the same DoFn class is used with different generic types.
572+
String typeSuffix = generateTypeSuffix(inputType, outputType);
573+
463574
final TypeDescription clazzDescription = new TypeDescription.ForLoadedType(fnClass);
464575

465576
DynamicType.Builder<?> builder =
466577
new ByteBuddy()
467578
// Create subclasses inside the target class, to have access to
468579
// private and package-private bits
469-
.with(
470-
StableInvokerNamingStrategy.forDoFnClass(fnClass)
471-
.withSuffix(DoFnInvoker.class.getSimpleName()))
580+
.with(StableInvokerNamingStrategy.forDoFnClass(fnClass).withSuffix(typeSuffix))
472581

473582
// class <invoker class> extends DoFnInvokerBase {
474583
.subclass(DoFnInvokerBase.class, ConstructorStrategy.Default.NO_CONSTRUCTORS)

sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokersTest.java

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import static org.hamcrest.Matchers.equalTo;
2525
import static org.junit.Assert.assertEquals;
2626
import static org.junit.Assert.assertFalse;
27+
import static org.junit.Assert.assertNotSame;
2728
import static org.junit.Assert.assertNull;
2829
import static org.junit.Assert.assertSame;
2930
import static org.junit.Assert.assertThrows;
@@ -77,6 +78,8 @@
7778
import org.apache.beam.sdk.transforms.windowing.PaneInfo;
7879
import org.apache.beam.sdk.util.UserCodeException;
7980
import org.apache.beam.sdk.values.OutputBuilder;
81+
import org.apache.beam.sdk.values.TypeDescriptor;
82+
import org.apache.beam.sdk.values.TypeDescriptors;
8083
import org.apache.beam.sdk.values.WindowedValues;
8184
import org.joda.time.Instant;
8285
import org.junit.Before;
@@ -1382,11 +1385,14 @@ public void process() {}
13821385
@Test
13831386
public void testStableName() {
13841387
DoFnInvoker<Void, Void> invoker = DoFnInvokers.invokerFor(new StableNameTestDoFn());
1388+
// The invoker class name includes a hash of the type descriptors to support
1389+
// different generic instantiations of the same DoFn class.
1390+
// Format: <DoFn class name>$<DoFnInvoker>$<type hash>
1391+
TypeDescriptor<Void> voidType = new StableNameTestDoFn().getInputTypeDescriptor();
1392+
String expectedTypeSuffix = ByteBuddyDoFnInvokerFactory.generateTypeSuffix(voidType, voidType);
13851393
assertThat(
13861394
invoker.getClass().getName(),
1387-
equalTo(
1388-
String.format(
1389-
"%s$%s", StableNameTestDoFn.class.getName(), DoFnInvoker.class.getSimpleName())));
1395+
equalTo(String.format("%s$%s", StableNameTestDoFn.class.getName(), expectedTypeSuffix)));
13901396
}
13911397

13921398
@Test
@@ -1406,4 +1412,45 @@ public void processElement(BundleFinalizer bundleFinalizer) {
14061412

14071413
verify(mockBundleFinalizer).afterBundleCommit(eq(Instant.ofEpochSecond(42L)), eq(null));
14081414
}
1415+
1416+
@Test
1417+
public void testCacheKeyCollisionProof() throws Exception {
1418+
class DynamicTypeDoFn<T> extends DoFn<T, T> {
1419+
private final TypeDescriptor<T> typeDescriptor;
1420+
1421+
DynamicTypeDoFn(TypeDescriptor<T> typeDescriptor) {
1422+
this.typeDescriptor = typeDescriptor;
1423+
}
1424+
1425+
@ProcessElement
1426+
public void processElement(@Element T element, OutputReceiver<T> out) {
1427+
out.output(element);
1428+
}
1429+
1430+
// Key point: force returning our specified type instead of relying on class signature
1431+
@Override
1432+
public TypeDescriptor<T> getInputTypeDescriptor() {
1433+
return typeDescriptor;
1434+
}
1435+
1436+
@Override
1437+
public TypeDescriptor<T> getOutputTypeDescriptor() {
1438+
return typeDescriptor;
1439+
}
1440+
}
1441+
1442+
DoFn<String, String> stringFn = new DynamicTypeDoFn<>(TypeDescriptors.strings());
1443+
DoFn<Integer, Integer> intFn = new DynamicTypeDoFn<>(TypeDescriptors.integers());
1444+
1445+
DoFnInvoker<String, String> stringInvoker = DoFnInvokers.invokerFor(stringFn);
1446+
DoFnInvoker<Integer, Integer> intInvoker = DoFnInvokers.invokerFor(intFn);
1447+
1448+
System.out.println("String Invoker: " + stringInvoker.getClass().getName());
1449+
System.out.println("Integer Invoker: " + intInvoker.getClass().getName());
1450+
1451+
assertNotSame(
1452+
"Critical bug: Beam returned the same cached class for different generic types.",
1453+
stringInvoker.getClass(),
1454+
intInvoker.getClass());
1455+
}
14091456
}

0 commit comments

Comments
 (0)