|
22 | 22 | import java.util.Arrays; |
23 | 23 | import java.util.Collections; |
24 | 24 | import java.util.LinkedHashMap; |
| 25 | +import java.util.LinkedHashSet; |
| 26 | +import java.util.List; |
25 | 27 | import java.util.Map; |
| 28 | +import java.util.Objects; |
26 | 29 | import java.util.Set; |
27 | 30 | import java.util.stream.Collectors; |
28 | 31 |
|
|
31 | 34 |
|
32 | 35 | import com.esotericsoftware.kryo.Kryo; |
33 | 36 | import com.esotericsoftware.kryo.Serializer; |
34 | | -import org.apache.cassandra.bridge.BaseCassandraBridgeFactory; |
35 | 37 | import org.apache.cassandra.bridge.BigNumberConfigImpl; |
36 | 38 | import org.apache.cassandra.bridge.CassandraBridgeFactory; |
37 | 39 | import org.apache.cassandra.bridge.CassandraVersion; |
@@ -142,26 +144,33 @@ public static void setup(@NotNull SparkConf configuration) |
142 | 144 | LOGGER.info("Setting up Kryo"); |
143 | 145 | configuration.set(SPARK_SERIALIZER, "org.apache.spark.serializer.KryoSerializer"); |
144 | 146 |
|
145 | | - // Add KryoRegister to SparkConf serialization if not already there |
| 147 | + // Preserve any pre-existing (e.g. user-supplied) registrators and keep them first. |
| 148 | + // LinkedHashSet gives a stable, predictable registration order on the driver; the same |
| 149 | + // resulting spark.kryo.registrator string is then propagated to all executors via SparkConf. |
146 | 150 | Set<String> registratorsSet = Arrays.stream(configuration.get(SPARK_REGISTRATORS, "").split(",")) |
147 | 151 | .filter(string -> string != null && !string.isEmpty()) |
148 | | - .collect(Collectors.toSet()); |
149 | | - |
150 | | - // TODO: Find a better way to initialize Kryo serializer, instead of relaying |
151 | | - // on Cassandra version specified as parameter of Spark job. Can we get Cassandra version from Sidecar? |
152 | | - CassandraVersion cassandraVersion = BaseCassandraBridgeFactory.getCassandraVersion(configuration.get(CASSANDRA_VERSION, "4.0.0")); |
153 | | - Class<?> registratorClass = KRYO_REGISTRATORS.get(cassandraVersion); |
154 | | - if (registratorClass == null) |
| 152 | + .collect(Collectors.toCollection(LinkedHashSet::new)); |
| 153 | + |
| 154 | + // SSTable based bridge selection feature selects the bridge version, which may differ |
| 155 | + // from cassandra.version; registering every loadable bridge's registrator ensures Spark |
| 156 | + // can serialize objects for whichever bridge is chosen. Only implemented (bundled) versions |
| 157 | + // are used, so we never attempt to load a bridge JAR that is not available. |
| 158 | + List<Class<?>> registratorClasses = Arrays.stream(CassandraVersion.implementedVersions()) |
| 159 | + .map(KRYO_REGISTRATORS::get) |
| 160 | + .filter(Objects::nonNull) |
| 161 | + .collect(Collectors.toList()); |
| 162 | + if (registratorClasses.isEmpty()) |
155 | 163 | { |
156 | | - throw new IllegalArgumentException("Kryo registrator not configured for Cassandra version: " + cassandraVersion); |
| 164 | + throw new IllegalStateException("No Kryo registrators configured for implemented Cassandra versions: " |
| 165 | + + Arrays.toString(CassandraVersion.implementedVersions())); |
157 | 166 | } |
158 | 167 |
|
159 | | - registratorsSet.add(registratorClass.getName()); |
| 168 | + registratorClasses.forEach(registratorClass -> registratorsSet.add(registratorClass.getName())); |
160 | 169 | String registratorsString = String.join(",", registratorsSet); |
161 | 170 | LOGGER.info("Setting kryo registrators: " + registratorsString); |
162 | 171 | configuration.set(SPARK_REGISTRATORS, registratorsString); |
163 | 172 |
|
164 | | - configuration.registerKryoClasses(new Class<?>[]{registratorClass}); |
| 173 | + configuration.registerKryoClasses(registratorClasses.toArray(new Class<?>[0])); |
165 | 174 | } |
166 | 175 |
|
167 | 176 | public static class V40 extends KryoRegister |
|
0 commit comments