Skip to content

Commit c8a7a26

Browse files
This Pull Request aim to fix the problem of DBJSONB dirty detection listed in #3129 which was due to PostgreSQL JSONB key reordering while storing value. This cause Ebean to mark @DbJsonB properties as dirty on every load (triggering unnecessary UPDATEs and version increments) because the raw DB JSON key order differed from Jackson's serialization order. Currently, Ebean is using CRC32 Checksum to compare but it is field ordering-dependent. Introduce JsonContentHash: A streaming order-independent structural hash of JSON content using Jackson's JsonParser. Object keys are combined with commutative addition (a + b == b + a) so key ordering does not affect the hash, while array elements use positional hashing to preserve semantic ordering. The hash uses FNV-1a for strings and MurmurHash3's fmix64 finalizer (both public domain) for mixing, producing a 64-bit hash with strong avalanche properties. Changes: - Add JsonContentHash utility (streaming, zero allocation, O(n) time) - SourceMutableValue: use fast string equality with canonical hash fallback - ChecksumMutableValue: replace CRC32 with JsonContentHash (also upgrades collision resistance from 2^32 to 2^64) - No API changes, no schema changes, readSet() untouched
1 parent aeef6d0 commit c8a7a26

File tree

3 files changed

+318
-7
lines changed

3 files changed

+318
-7
lines changed

ebean-core/src/main/java/io/ebeaninternal/server/deploy/BeanPropertyJsonMapper.java

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import io.ebean.core.type.ScalarType;
1111
import io.ebean.text.TextException;
1212
import io.ebeaninternal.server.deploy.meta.DeployBeanProperty;
13-
import io.ebeaninternal.server.util.Checksum;
13+
import io.ebeaninternal.server.util.JsonContentHash;
1414

1515
import jakarta.persistence.PersistenceException;
1616
import java.sql.SQLException;
@@ -141,7 +141,10 @@ public MutableValueInfo info() {
141141
}
142142

143143
/**
144-
* Hold checksum of json source content to use for dirty detection.
144+
* Hold canonical hash of json content to use for dirty detection.
145+
* <p>
146+
* Uses an order-independent hash so that databases which reorder JSON object
147+
* keys (e.g. PostgreSQL JSONB) do not cause false dirty detection.
145148
* <p>
146149
* Does not support rebuilding 'oldValue' as no original json content.
147150
*/
@@ -152,7 +155,7 @@ private static final class ChecksumMutableValue implements MutableValueInfo {
152155

153156
ChecksumMutableValue(ScalarType<?> parent, String json) {
154157
this.parent = parent;
155-
this.checksum = Checksum.checksum(json);
158+
this.checksum = JsonContentHash.hash(json);
156159
}
157160

158161
/**
@@ -165,13 +168,13 @@ private static final class ChecksumMutableValue implements MutableValueInfo {
165168

166169
@Override
167170
public MutableValueNext nextDirty(String json) {
168-
final long nextChecksum = Checksum.checksum(json);
171+
final long nextChecksum = JsonContentHash.hash(json);
169172
return nextChecksum == checksum ? null : new NextPair(json, new ChecksumMutableValue(parent, nextChecksum));
170173
}
171174

172175
@Override
173176
public boolean isEqualToObject(Object obj) {
174-
return Checksum.checksum(parent.format(obj)) == checksum;
177+
return JsonContentHash.hash(parent.format(obj)) == checksum;
175178
}
176179

177180
@Override
@@ -182,6 +185,10 @@ public Object get() {
182185

183186
/**
184187
* Hold json source content. This supports rebuilding the 'oldValue'.
188+
* <p>
189+
* Uses fast string equality as primary check, with an order-independent
190+
* canonical hash as fallback to handle databases that reorder JSON object
191+
* keys (e.g. PostgreSQL JSONB).
185192
*/
186193
private static final class SourceMutableValue implements MutableValueInfo, MutableValueNext {
187194

@@ -195,12 +202,15 @@ private static final class SourceMutableValue implements MutableValueInfo, Mutab
195202

196203
@Override
197204
public MutableValueNext nextDirty(String json) {
198-
return Objects.equals(originalJson, json) ? null : new SourceMutableValue(parent, json);
205+
if (jsonContentEqual(originalJson, json)) {
206+
return null;
207+
}
208+
return new SourceMutableValue(parent, json);
199209
}
200210

201211
@Override
202212
public boolean isEqualToObject(Object obj) {
203-
return Objects.equals(originalJson, parent.format(obj));
213+
return jsonContentEqual(originalJson, parent.format(obj));
204214
}
205215

206216
@Override
@@ -219,4 +229,13 @@ public MutableValueInfo info() {
219229
return this;
220230
}
221231
}
232+
233+
/**
234+
* Compare two JSON strings for content equality, ignoring key ordering.
235+
* Uses fast string equality first, falls back to order-independent hash comparison.
236+
*/
237+
private static boolean jsonContentEqual(String json1, String json2) {
238+
return Objects.equals(json1, json2)
239+
|| JsonContentHash.hash(json1) == JsonContentHash.hash(json2);
240+
}
222241
}
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
package io.ebeaninternal.server.util;
2+
3+
import com.fasterxml.jackson.core.JsonFactory;
4+
import com.fasterxml.jackson.core.JsonParser;
5+
import com.fasterxml.jackson.core.JsonToken;
6+
7+
import java.io.IOException;
8+
9+
/**
10+
* Compute an order-independent structural hash of JSON content using Jackson's streaming parser.
11+
* <p>
12+
* Object key ordering does NOT affect the hash value (handles PostgreSQL JSONB key reordering),
13+
* while array element ordering DOES affect it (array position is semantically significant).
14+
* <p>
15+
* This is significantly faster than a full parse/format roundtrip because it performs
16+
* zero object allocation beyond the parser itself — no tree building, no reflection,
17+
* no type conversion. Single-pass O(n) time with O(depth) stack space.
18+
*/
19+
public final class JsonContentHash {
20+
21+
private static final JsonFactory FACTORY = new JsonFactory();
22+
23+
/**
24+
* Compute an order-independent hash of JSON content.
25+
* Two JSON strings with identical content but different key ordering
26+
* will produce the same hash value.
27+
*/
28+
public static long hash(String json) {
29+
if (json == null || json.isEmpty()) {
30+
return 0L;
31+
}
32+
try (JsonParser parser = FACTORY.createParser(json)) {
33+
parser.nextToken();
34+
return computeHash(parser);
35+
} catch (IOException e) {
36+
// Fallback to regular string hash if JSON is malformed.
37+
// This is safe: two identical malformed strings produce the same hash,
38+
// and a malformed string won't falsely match a valid one.
39+
return stringHash(json);
40+
}
41+
}
42+
43+
private static long computeHash(JsonParser parser) throws IOException {
44+
JsonToken token = parser.currentToken();
45+
if (token == null) {
46+
return 0L;
47+
}
48+
switch (token) {
49+
case START_OBJECT:
50+
return hashObject(parser);
51+
case START_ARRAY:
52+
return hashArray(parser);
53+
case VALUE_STRING:
54+
return mix(stringHash(parser.getText()));
55+
case VALUE_NUMBER_INT:
56+
case VALUE_NUMBER_FLOAT:
57+
// Use text representation for numeric consistency across int/long/double
58+
return mix(stringHash(parser.getText()));
59+
case VALUE_TRUE:
60+
return 0x9E3779B97F4A7C15L;
61+
case VALUE_FALSE:
62+
return 0x517CC1B727220A95L;
63+
case VALUE_NULL:
64+
return 0x6C62272E07BB0142L;
65+
default:
66+
return 0L;
67+
}
68+
}
69+
70+
// Type markers to distinguish empty object {}, empty array [], and null
71+
private static final long OBJECT_SEED = 0x7A5662B4E8B10FA3L;
72+
private static final long ARRAY_SEED = 0x3C6EF372FE94F82BL;
73+
74+
/**
75+
* Hash an object using commutative addition of entry hashes.
76+
* Addition is commutative (a + b == b + a), so the result is
77+
* independent of the order in which keys appear in the JSON.
78+
*/
79+
private static long hashObject(JsonParser parser) throws IOException {
80+
long hash = OBJECT_SEED;
81+
while (parser.nextToken() != JsonToken.END_OBJECT) {
82+
long keyHash = stringHash(parser.currentName());
83+
parser.nextToken();
84+
long valueHash = computeHash(parser);
85+
// Mix key+value into a single entry hash, then add (commutative)
86+
hash += mix(keyHash * 0x9E3779B97F4A7C15L + valueHash);
87+
}
88+
return hash;
89+
}
90+
91+
/**
92+
* Hash an array using position-dependent combination.
93+
* Array element order IS semantically significant in JSON.
94+
*/
95+
private static long hashArray(JsonParser parser) throws IOException {
96+
long hash = ARRAY_SEED;
97+
while (parser.nextToken() != JsonToken.END_ARRAY) {
98+
hash = hash * 31 + computeHash(parser);
99+
}
100+
return mix(hash);
101+
}
102+
103+
/**
104+
* 64-bit FNV-1a inspired string hash for better distribution than String.hashCode().
105+
*/
106+
private static long stringHash(String s) {
107+
long h = 0xcbf29ce484222325L;
108+
for (int i = 0; i < s.length(); i++) {
109+
h ^= s.charAt(i);
110+
h *= 0x100000001b3L;
111+
}
112+
return h;
113+
}
114+
115+
/**
116+
* Mixing/finalizer function to improve hash distribution and break
117+
* additive symmetry (prevents collisions when values are swapped between keys).
118+
* <p>
119+
* This is fmix64 from MurmurHash3 by Austin Appleby (public domain).
120+
* See: https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
121+
*/
122+
private static long mix(long h) {
123+
h ^= (h >>> 33);
124+
h *= 0xff51afd7ed558ccdL;
125+
h ^= (h >>> 33);
126+
h *= 0xc4ceb9fe1a85ec53L;
127+
h ^= (h >>> 33);
128+
return h;
129+
}
130+
}
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
package io.ebeaninternal.server.util;
2+
3+
import org.junit.jupiter.api.Test;
4+
5+
import static org.assertj.core.api.Assertions.assertThat;
6+
7+
class JsonContentHashTest {
8+
9+
@Test
10+
void sameContent_sameHash() {
11+
String json = "{\"name\":\"Alice\",\"age\":30}";
12+
assertThat(JsonContentHash.hash(json)).isEqualTo(JsonContentHash.hash(json));
13+
}
14+
15+
@Test
16+
void reorderedKeys_sameHash() {
17+
// The core scenario: PostgreSQL JSONB reorders keys
18+
String jackson = "{\"status\":\"ACTIVE\",\"type\":\"ADMIN\"}";
19+
String postgres = "{\"type\":\"ADMIN\",\"status\":\"ACTIVE\"}";
20+
assertThat(JsonContentHash.hash(jackson)).isEqualTo(JsonContentHash.hash(postgres));
21+
}
22+
23+
@Test
24+
void reorderedKeys_multipleFields() {
25+
String a = "{\"zebra\":1,\"apple\":2,\"mango\":3}";
26+
String b = "{\"apple\":2,\"mango\":3,\"zebra\":1}";
27+
String c = "{\"mango\":3,\"zebra\":1,\"apple\":2}";
28+
long hashA = JsonContentHash.hash(a);
29+
long hashB = JsonContentHash.hash(b);
30+
long hashC = JsonContentHash.hash(c);
31+
assertThat(hashA).isEqualTo(hashB);
32+
assertThat(hashA).isEqualTo(hashC);
33+
}
34+
35+
@Test
36+
void differentValues_differentHash() {
37+
String a = "{\"status\":\"ACTIVE\",\"type\":\"ADMIN\"}";
38+
String b = "{\"status\":\"INACTIVE\",\"type\":\"ADMIN\"}";
39+
assertThat(JsonContentHash.hash(a)).isNotEqualTo(JsonContentHash.hash(b));
40+
}
41+
42+
@Test
43+
void differentKeys_differentHash() {
44+
String a = "{\"name\":\"Alice\"}";
45+
String b = "{\"nome\":\"Alice\"}";
46+
assertThat(JsonContentHash.hash(a)).isNotEqualTo(JsonContentHash.hash(b));
47+
}
48+
49+
@Test
50+
void nestedObjects_reorderedKeys() {
51+
String a = "{\"user\":{\"first\":\"Alice\",\"last\":\"Smith\"},\"active\":true}";
52+
String b = "{\"active\":true,\"user\":{\"last\":\"Smith\",\"first\":\"Alice\"}}";
53+
assertThat(JsonContentHash.hash(a)).isEqualTo(JsonContentHash.hash(b));
54+
}
55+
56+
@Test
57+
void nestedObjects_differentValues() {
58+
String a = "{\"user\":{\"first\":\"Alice\",\"last\":\"Smith\"}}";
59+
String b = "{\"user\":{\"first\":\"Bob\",\"last\":\"Smith\"}}";
60+
assertThat(JsonContentHash.hash(a)).isNotEqualTo(JsonContentHash.hash(b));
61+
}
62+
63+
@Test
64+
void arrayOrder_matters() {
65+
// Array element order IS semantically significant
66+
String a = "[1,2,3]";
67+
String b = "[3,2,1]";
68+
assertThat(JsonContentHash.hash(a)).isNotEqualTo(JsonContentHash.hash(b));
69+
}
70+
71+
@Test
72+
void arrayOrder_sameOrder_sameHash() {
73+
String a = "[1,2,3]";
74+
String b = "[1,2,3]";
75+
assertThat(JsonContentHash.hash(a)).isEqualTo(JsonContentHash.hash(b));
76+
}
77+
78+
@Test
79+
void enumValues_reorderedKeys() {
80+
// The exact scenario from issue #3129: POJO with multiple enum fields
81+
String jackson = "{\"status\":\"ACTIVE\",\"role\":\"ADMIN\",\"priority\":\"HIGH\"}";
82+
String postgres = "{\"role\":\"ADMIN\",\"priority\":\"HIGH\",\"status\":\"ACTIVE\"}";
83+
assertThat(JsonContentHash.hash(jackson)).isEqualTo(JsonContentHash.hash(postgres));
84+
}
85+
86+
@Test
87+
void swappedValues_differentHash() {
88+
// Swapping values between keys must produce different hashes
89+
String a = "{\"a\":1,\"b\":2}";
90+
String b = "{\"a\":2,\"b\":1}";
91+
assertThat(JsonContentHash.hash(a)).isNotEqualTo(JsonContentHash.hash(b));
92+
}
93+
94+
@Test
95+
void emptyObject() {
96+
assertThat(JsonContentHash.hash("{}")).isNotEqualTo(0L);
97+
}
98+
99+
@Test
100+
void emptyArray() {
101+
assertThat(JsonContentHash.hash("[]")).isNotEqualTo(0L);
102+
}
103+
104+
@Test
105+
void emptyObject_vs_emptyArray() {
106+
assertThat(JsonContentHash.hash("{}")).isNotEqualTo(JsonContentHash.hash("[]"));
107+
}
108+
109+
@Test
110+
void nullInput() {
111+
assertThat(JsonContentHash.hash(null)).isEqualTo(0L);
112+
}
113+
114+
@Test
115+
void emptyString() {
116+
assertThat(JsonContentHash.hash("")).isEqualTo(0L);
117+
}
118+
119+
@Test
120+
void booleanValues() {
121+
String a = "{\"flag\":true}";
122+
String b = "{\"flag\":false}";
123+
assertThat(JsonContentHash.hash(a)).isNotEqualTo(JsonContentHash.hash(b));
124+
}
125+
126+
@Test
127+
void nullValues() {
128+
String a = "{\"value\":null}";
129+
String b = "{\"value\":\"text\"}";
130+
assertThat(JsonContentHash.hash(a)).isNotEqualTo(JsonContentHash.hash(b));
131+
}
132+
133+
@Test
134+
void numericTypes() {
135+
String a = "{\"count\":42}";
136+
String b = "{\"count\":43}";
137+
assertThat(JsonContentHash.hash(a)).isNotEqualTo(JsonContentHash.hash(b));
138+
}
139+
140+
@Test
141+
void whitespaceVariations() {
142+
// Whitespace in JSON structure (not in values) should not matter
143+
String compact = "{\"a\":1,\"b\":2}";
144+
String spaced = "{ \"a\" : 1 , \"b\" : 2 }";
145+
assertThat(JsonContentHash.hash(compact)).isEqualTo(JsonContentHash.hash(spaced));
146+
}
147+
148+
@Test
149+
void complexNestedStructure() {
150+
String a = "{\"users\":[{\"name\":\"Alice\"},{\"name\":\"Bob\"}],\"count\":2,\"active\":true}";
151+
String b = "{\"active\":true,\"count\":2,\"users\":[{\"name\":\"Alice\"},{\"name\":\"Bob\"}]}";
152+
assertThat(JsonContentHash.hash(a)).isEqualTo(JsonContentHash.hash(b));
153+
}
154+
155+
@Test
156+
void postgresJsonbKeyReordering_realistic() {
157+
// Simulates PostgreSQL JSONB storage which reorders by key length, then alphabetically
158+
String javaOrder = "{\"status\":\"ACTIVE\",\"type\":\"STANDARD\",\"createdAt\":\"2024-01-01\",\"id\":123}";
159+
String pgOrder = "{\"id\":123,\"type\":\"STANDARD\",\"status\":\"ACTIVE\",\"createdAt\":\"2024-01-01\"}";
160+
assertThat(JsonContentHash.hash(javaOrder)).isEqualTo(JsonContentHash.hash(pgOrder));
161+
}
162+
}

0 commit comments

Comments
 (0)