diff --git a/CHANGES.md b/CHANGES.md index 319520f94309..aa9a49a16e68 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -69,6 +69,10 @@ ## New Features / Improvements * X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* TriggerStateMachineRunner changes from BitSetCoder to SentinelBitSetCoder to + encode finished bitset. SentinelBitSetCoder and BitSetCoder are state + compatible. Both coders can decode encoded bytes from the other coder + ([#38139](https://github.com/apache/beam/issues/38139)). ## Breaking Changes diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/serialization/SentinelBitSetCoder.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/serialization/SentinelBitSetCoder.java new file mode 100644 index 000000000000..340816f6e0e1 --- /dev/null +++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/serialization/SentinelBitSetCoder.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.core.serialization; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.BitSet; +import org.apache.beam.sdk.coders.AtomicCoder; +import org.apache.beam.sdk.coders.ByteArrayCoder; +import org.apache.beam.sdk.coders.CoderException; + +/** + * Coder for {@link BitSet} that stores an empty bit set as a byte array with a single 0 element. In + * general BitSetCoder should be preferred as it encodes an empty bit set as an empty byte array. + * However, there are cases where non-empty values are useful to indicate presence. + */ +public class SentinelBitSetCoder extends AtomicCoder { + + private static final SentinelBitSetCoder INSTANCE = new SentinelBitSetCoder(); + private static final ByteArrayCoder BYTE_ARRAY_CODER = ByteArrayCoder.of(); + + private SentinelBitSetCoder() {} + + public static SentinelBitSetCoder of() { + return INSTANCE; + } + + @Override + public void encode(BitSet value, OutputStream outStream) throws CoderException, IOException { + encode(value, outStream, Context.NESTED); + } + + @Override + public void encode(BitSet value, OutputStream outStream, Context context) + throws CoderException, IOException { + if (value == null) { + throw new CoderException("cannot encode a null BitSet"); + } + byte[] bytes = value.isEmpty() ? new byte[] {0} : value.toByteArray(); + BYTE_ARRAY_CODER.encodeAndOwn(bytes, outStream, context); + } + + @Override + public BitSet decode(InputStream inStream) throws CoderException, IOException { + return decode(inStream, Context.NESTED); + } + + @Override + public BitSet decode(InputStream inStream, Context context) throws CoderException, IOException { + return BitSet.valueOf(BYTE_ARRAY_CODER.decode(inStream, context)); + } + + @Override + public void verifyDeterministic() throws NonDeterministicException { + verifyDeterministic( + this, + "SentinelBitSetCoder requires its ByteArrayCoder to be deterministic.", + BYTE_ARRAY_CODER); + } + + @Override + public boolean consistentWithEquals() { + return true; + } +} diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/triggers/TriggerStateMachineRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/triggers/TriggerStateMachineRunner.java index cf29646ebaa3..e3791821b728 100644 --- a/runners/core-java/src/main/java/org/apache/beam/runners/core/triggers/TriggerStateMachineRunner.java +++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/triggers/TriggerStateMachineRunner.java @@ -26,7 +26,7 @@ import org.apache.beam.runners.core.StateAccessor; import org.apache.beam.runners.core.StateTag; import org.apache.beam.runners.core.StateTags; -import org.apache.beam.sdk.coders.BitSetCoder; +import org.apache.beam.runners.core.serialization.SentinelBitSetCoder; import org.apache.beam.sdk.state.Timers; import org.apache.beam.sdk.state.ValueState; import org.apache.beam.sdk.transforms.windowing.BoundedWindow; @@ -59,7 +59,7 @@ public class TriggerStateMachineRunner { @VisibleForTesting public static final StateTag> FINISHED_BITS_TAG = - StateTags.makeSystemTagInternal(StateTags.value("closed", BitSetCoder.of())); + StateTags.makeSystemTagInternal(StateTags.value("closed", SentinelBitSetCoder.of())); private final ExecutableTriggerStateMachine rootTrigger; private final TriggerStateMachineContextFactory contextFactory; diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/serialization/SentinelBitSetCoderTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/serialization/SentinelBitSetCoderTest.java new file mode 100644 index 000000000000..91d7d369f70b --- /dev/null +++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/serialization/SentinelBitSetCoderTest.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.core.serialization; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +import java.util.Arrays; +import java.util.BitSet; +import java.util.List; +import org.apache.beam.sdk.coders.BitSetCoder; +import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.coders.Coder.Context; +import org.apache.beam.sdk.coders.CoderException; +import org.apache.beam.sdk.testing.CoderProperties; +import org.apache.beam.sdk.util.CoderUtils; +import org.apache.beam.sdk.values.TypeDescriptor; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link SentinelBitSetCoder}. */ +@RunWith(JUnit4.class) +public class SentinelBitSetCoderTest { + + private static final Coder TEST_CODER = SentinelBitSetCoder.of(); + + private static final List TEST_VALUES = + Arrays.asList( + BitSet.valueOf(new byte[] {0xa, 0xb, 0xc}), + BitSet.valueOf(new byte[] {0xd, 0x3}), + BitSet.valueOf(new byte[] {0xd, 0xe}), + BitSet.valueOf(new byte[] {0}), + BitSet.valueOf(new byte[] {})); + + @Test + public void testDecodeEncodeEquals() throws Exception { + for (BitSet value : TEST_VALUES) { + CoderProperties.coderDecodeEncodeEqual(TEST_CODER, value); + } + } + + @Test + public void testRegisterByteSizeObserver() throws Exception { + CoderProperties.testByteCount( + SentinelBitSetCoder.of(), Coder.Context.OUTER, TEST_VALUES.toArray(new BitSet[] {})); + + CoderProperties.testByteCount( + SentinelBitSetCoder.of(), Coder.Context.NESTED, TEST_VALUES.toArray(new BitSet[] {})); + } + + @Test + public void testStructuralValueConsistentWithEquals() throws Exception { + for (BitSet value1 : TEST_VALUES) { + for (BitSet value2 : TEST_VALUES) { + CoderProperties.structuralValueConsistentWithEquals(TEST_CODER, value1, value2); + } + } + } + + /** + * Generated data to check that the wire format has not changed. "CgsM" is {0xa, 0xb, 0xc} "DQM" + * is {0xd, 0x3} "DQ4" is {0xd, 0xe} "AA==" is {0} (Sentinel for empty BitSet) + */ + private static final List TEST_ENCODINGS = + Arrays.asList("CgsM", "DQM", "DQ4", "AA", "AA"); + + @Test + public void testWireFormatEncode() throws Exception { + CoderProperties.coderEncodesBase64(TEST_CODER, TEST_VALUES, TEST_ENCODINGS); + } + + @Rule public ExpectedException thrown = ExpectedException.none(); + + @Test + public void encodeNullThrowsCoderException() throws Exception { + thrown.expect(CoderException.class); + thrown.expectMessage("cannot encode a null BitSet"); + + CoderUtils.encodeToBase64(TEST_CODER, null); + } + + @Test + public void testEncodedTypeDescriptor() throws Exception { + assertThat(TEST_CODER.getEncodedTypeDescriptor(), equalTo(TypeDescriptor.of(BitSet.class))); + } + + @Test + public void testEmptyBitSetEncoding() throws Exception { + { + byte[] encoded = CoderUtils.encodeToByteArray(TEST_CODER, new BitSet()); + // ByteArrayCoder in OUTER context encodes as is. + assertThat(encoded, equalTo(new byte[] {0})); + } + { + byte[] encoded = CoderUtils.encodeToByteArray(TEST_CODER, new BitSet(), Context.NESTED); + // Varint length = 1, data = 1 + assertThat(encoded, equalTo(new byte[] {1, 0})); + } + } + + @Test + public void testCompatibilityWithBitSetCoder() throws Exception { + BitSetCoder bitSetCoder = BitSetCoder.of(); + SentinelBitSetCoder sentinelCoder = SentinelBitSetCoder.of(); + + for (BitSet bitset : TEST_VALUES) { + for (Coder.Context context : Arrays.asList(Coder.Context.OUTER, Coder.Context.NESTED)) { + // Test SentinelBitSetCoder can decode bytes encoded by BitSetCoder + { + byte[] encodedByBitSet = CoderUtils.encodeToByteArray(bitSetCoder, bitset, context); + BitSet decodedBySentinel = + CoderUtils.decodeFromByteArray(sentinelCoder, encodedByBitSet, context); + assertThat( + "Decoding BitSetCoder encoded value with context " + context, + decodedBySentinel, + equalTo(bitset)); + } + + // Test BitSetCoder can decode bytes encoded by SentinelBitSetCoder + { + byte[] encodedBySentinel = CoderUtils.encodeToByteArray(sentinelCoder, bitset, context); + BitSet decodedByBitSet = + CoderUtils.decodeFromByteArray(bitSetCoder, encodedBySentinel, context); + assertThat( + "Decoding SentinelBitSetCoder encoded value with context " + context, + decodedByBitSet, + equalTo(bitset)); + } + } + } + } +}