Skip to content

Commit 5b13315

Browse files
xiangfu0claude
andcommitted
Restore legacy SQL hash function names as aliases
Critical fix: - HashFunctions.java: when this branch added @ScalarFunction(names = {"murmur2", ...}) on existing public byte[]-overload hash functions (murmurHash2, fnv1Hash32, fnv1aHash32, fnv1Hash64, fnv1aHash64), the explicit names array overrode the auto-derived canonical name — silently breaking existing user SQL queries / ingestion transforms / views that called these functions on byte[] inputs by their original names. Add the legacy camelCase names alongside the new short aliases so both resolve to the same method: @ScalarFunction(names = {"murmur2", "murmurHash2"}) @ScalarFunction(names = {"fnv1_32", "fnv1Hash32"}) @ScalarFunction(names = {"fnv1a_32", "fnv1aHash32"}) @ScalarFunction(names = {"fnv1_64", "fnv1Hash64"}) @ScalarFunction(names = {"fnv1a_64", "fnv1aHash64"}) (murmur3Bit32Default is a new 1-arg method introduced by this branch with no upstream auto-derived name to preserve, so its single "murmur3_32" alias is fine.) - HashFunctionsTest: add testLegacyAndNewHashFunctionAliasesBothResolve asserting both the legacy and new alias resolve via FunctionRegistry to the same method, locking the contract so a future name reshuffle cannot silently regress backward compatibility. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent c56a353 commit 5b13315

2 files changed

Lines changed: 44 additions & 5 deletions

File tree

pinot-common/src/main/java/org/apache/pinot/common/function/scalar/HashFunctions.java

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,9 @@ public static byte[] md5Raw(byte[] input) {
122122
* @param input the byte array to hash
123123
* @return 32-bit hash
124124
*/
125-
@ScalarFunction(names = {"murmur2"})
125+
// "murmurHash2" preserves the canonical name auto-derived on master before this PR introduced the "murmur2" alias.
126+
// Existing user SQL queries / ingestion transforms calling murmurHash2(byte_col) must continue to resolve.
127+
@ScalarFunction(names = {"murmur2", "murmurHash2"})
126128
public static int murmurHash2(byte[] input) {
127129
return MurmurHashFunctions.murmurHash2(input);
128130
}
@@ -240,7 +242,8 @@ public static byte[] murmurHash3X64Bit128(byte[] input, int seed) {
240242
* @param input the byte array to hash
241243
* @return 32-bit hash
242244
*/
243-
@ScalarFunction(names = {"fnv1_32"})
245+
// "fnv1Hash32" preserves the canonical name auto-derived on master before this PR introduced the "fnv1_32" alias.
246+
@ScalarFunction(names = {"fnv1_32", "fnv1Hash32"})
244247
public static int fnv1Hash32(byte[] input) {
245248
return FnvHashFunctions.fnv1Hash32(input);
246249
}
@@ -262,7 +265,8 @@ public static int fnv1Hash32UTF8(String input) {
262265
* @param input the byte array to hash
263266
* @return 32-bit hash
264267
*/
265-
@ScalarFunction(names = {"fnv1a_32"})
268+
// "fnv1aHash32" preserves the canonical name auto-derived on master before this PR introduced the "fnv1a_32" alias.
269+
@ScalarFunction(names = {"fnv1a_32", "fnv1aHash32"})
266270
public static int fnv1aHash32(byte[] input) {
267271
return FnvHashFunctions.fnv1aHash32(input);
268272
}
@@ -284,7 +288,8 @@ public static int fnv1aHash32UTF8(String input) {
284288
* @param input the byte array to hash
285289
* @return 64-bit hash
286290
*/
287-
@ScalarFunction(names = {"fnv1_64"})
291+
// "fnv1Hash64" preserves the canonical name auto-derived on master before this PR introduced the "fnv1_64" alias.
292+
@ScalarFunction(names = {"fnv1_64", "fnv1Hash64"})
288293
public static long fnv1Hash64(byte[] input) {
289294
return FnvHashFunctions.fnv1Hash64(input);
290295
}
@@ -306,7 +311,8 @@ public static long fnv1Hash64UTF8(String input) {
306311
* @param input the byte array to hash
307312
* @return 64-bit hash
308313
*/
309-
@ScalarFunction(names = {"fnv1a_64"})
314+
// "fnv1aHash64" preserves the canonical name auto-derived on master before this PR introduced the "fnv1a_64" alias.
315+
@ScalarFunction(names = {"fnv1a_64", "fnv1aHash64"})
310316
public static long fnv1aHash64(byte[] input) {
311317
return FnvHashFunctions.fnv1aHash64(input);
312318
}

pinot-common/src/test/java/org/apache/pinot/common/function/scalar/HashFunctionsTest.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@
1919
package org.apache.pinot.common.function.scalar;
2020

2121
import java.nio.charset.StandardCharsets;
22+
import org.apache.pinot.common.function.FunctionInfo;
23+
import org.apache.pinot.common.function.FunctionRegistry;
2224
import org.apache.pinot.spi.utils.hash.FnvHashFunctions;
2325
import org.testng.annotations.Test;
2426

2527
import static org.testng.Assert.assertEquals;
28+
import static org.testng.Assert.assertNotNull;
2629

2730

2831
public class HashFunctionsTest {
@@ -225,4 +228,34 @@ public void testCityHash128() {
225228
assertEquals(HashFunctions.cityHash128(INPUT_LEN_156.getBytes()),
226229
new byte[] {114, 8, 23, 53, -124, 83, -28, 35, 27, -117, -59, 121, -108, -20, 59, 115});
227230
}
231+
232+
/**
233+
* Backward-compat regression: this branch added explicit {@code @ScalarFunction(names = {"murmur2", ...})}
234+
* aliases to byte[]-overload hash functions. The {@code names} array overrides the auto-derived canonical name
235+
* from the method, so without explicitly listing the original (camelCase) name, existing user SQL queries calling
236+
* {@code murmurHash2}/{@code fnv1Hash32}/{@code fnv1aHash32}/{@code fnv1Hash64}/{@code fnv1aHash64} on a byte[]
237+
* argument would silently stop resolving the byte[] overload after upgrade. This test locks both the new short
238+
* alias and the legacy auto-derived name so future name reshuffles cannot regress backward compatibility.
239+
*/
240+
@Test
241+
public void testLegacyAndNewHashFunctionAliasesBothResolve() {
242+
// Note: lookupFunctionInfo expects canonicalized names (lowercased, underscores stripped). Both the legacy
243+
// camelCase name and the new alias should canonicalize to a registered key.
244+
String[][] aliasPairs = new String[][] {
245+
// {legacy auto-derived name, new alias}
246+
{"murmurHash2", "murmur2"},
247+
{"fnv1Hash32", "fnv1_32"},
248+
{"fnv1aHash32", "fnv1a_32"},
249+
{"fnv1Hash64", "fnv1_64"},
250+
{"fnv1aHash64", "fnv1a_64"},
251+
};
252+
for (String[] pair : aliasPairs) {
253+
FunctionInfo legacy = FunctionRegistry.lookupFunctionInfo(FunctionRegistry.canonicalize(pair[0]), 1);
254+
FunctionInfo alias = FunctionRegistry.lookupFunctionInfo(FunctionRegistry.canonicalize(pair[1]), 1);
255+
assertNotNull(legacy, "Legacy alias must still resolve: " + pair[0]);
256+
assertNotNull(alias, "New alias must resolve: " + pair[1]);
257+
assertEquals(legacy.getMethod(), alias.getMethod(),
258+
"Legacy and new aliases must resolve to the same method: " + pair[0] + " vs " + pair[1]);
259+
}
260+
}
228261
}

0 commit comments

Comments
 (0)