[flink] Preserve log-only source restore mode

luoyuxia · luoyuxia · commit 71b01aad0eff · 2026-05-20T11:54:59.000+08:00
diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/enumerator/FlinkSourceEnumerator.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/enumerator/FlinkSourceEnumerator.java
@@ -134,6 +134,19 @@ public class FlinkSourceEnumerator
     /** Buckets that have been assigned to readers. */
     private final Set<TableBucket> assignedTableBuckets;
 
+    /**
+     * Remaining lake snapshot and hybrid lake/Fluss splits to assign.
+     *
+     * <p>The field has three states:
+     *
+     * <ul>
+     *   <li>{@code null}: lake split initialization has not run yet.
+     *   <li>empty list: lake split initialization has run, or this enumerator was started in
+     *       Fluss-only (non-lake) mode and must not initialize lake splits after restore.
+     *   <li>non-empty list: lake split initialization has run and these splits still need to be
+     *       assigned.
+     * </ul>
+     */
     @Nullable private List<SourceSplitBase> pendingHybridLakeFlussSplits;
 
     private final long scanPartitionDiscoveryIntervalMs;
@@ -1207,11 +1220,16 @@ public void addReader(int subtaskId) {
 
     @Override
     public SourceEnumeratorState snapshotState(long checkpointId) {
+        List<SourceSplitBase> remainingHybridLakeFlussSplits =
+                // Preserve Fluss-only (non-lake) startup across restore. Otherwise a restored
+                // enumerator with a non-null lakeSource would treat null as "not initialized yet"
+                // and generate lake snapshot splits.
+                lakeSource == null ? Collections.emptyList() : pendingHybridLakeFlussSplits;
         final SourceEnumeratorState enumeratorState =
                 new SourceEnumeratorState(
                         assignedTableBuckets,
                         assignedPartitions,
-                        pendingHybridLakeFlussSplits,
+                        remainingHybridLakeFlussSplits,
                         leaseContext.getKvSnapshotLeaseId());
         LOG.debug("Source Checkpoint is {}", enumeratorState);
         return enumeratorState;
diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/source/enumerator/FlinkSourceEnumeratorTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/source/enumerator/FlinkSourceEnumeratorTest.java
@@ -32,6 +32,7 @@
 import org.apache.fluss.flink.source.split.LogSplit;
 import org.apache.fluss.flink.source.split.SnapshotSplit;
 import org.apache.fluss.flink.source.split.SourceSplitBase;
+import org.apache.fluss.flink.source.state.SourceEnumeratorState;
 import org.apache.fluss.flink.utils.FlinkTestBase;
 import org.apache.fluss.lake.source.LakeSource;
 import org.apache.fluss.lake.source.LakeSplit;
@@ -210,6 +211,97 @@ void testInvalidSplitAssignmentBatchSize() throws Exception {
         }
     }
 
+    @Test
+    void testRestoreFlussOnlySourceWithLakeSourceDoesNotGenerateLakeSplits(@TempDir Path tempDir)
+            throws Throwable {
+        long tableId =
+                createTable(DEFAULT_TABLE_PATH, DEFAULT_AUTO_PARTITIONED_LOG_TABLE_DESCRIPTOR);
+        ZooKeeperClient zooKeeperClient = FLUSS_CLUSTER_EXTENSION.getZooKeeperClient();
+        Map<Long, String> partitionNameByIds =
+                waitUntilPartitions(zooKeeperClient, DEFAULT_TABLE_PATH);
+        Long partitionId = partitionNameByIds.keySet().stream().sorted().findFirst().get();
+        String partitionName = partitionNameByIds.get(partitionId);
+
+        LakeTableSnapshot lakeTableSnapshot =
+                new LakeTableSnapshot(
+                        0,
+                        ImmutableMap.of(
+                                new TableBucket(tableId, partitionId, 0), 50L,
+                                new TableBucket(tableId, partitionId, 1), 50L,
+                                new TableBucket(tableId, partitionId, 2), 50L));
+        LakeTableHelper lakeTableHelper = new LakeTableHelper(zooKeeperClient, tempDir.toString());
+        lakeTableHelper.registerLakeTableSnapshotV1(tableId, lakeTableSnapshot);
+
+        ResolvedPartitionSpec partitionSpec =
+                ResolvedPartitionSpec.fromPartitionName(
+                        Collections.singletonList("name"), partitionName);
+        LakeSource<LakeSplit> lakeSource =
+                new TestingLakeSource(
+                        DEFAULT_BUCKET_NUM,
+                        Collections.singletonList(
+                                new PartitionInfo(
+                                        partitionId, partitionSpec, DEFAULT_REMOTE_DATA_DIR)));
+
+        SourceEnumeratorState checkpointState;
+        try (MockSplitEnumeratorContext<SourceSplitBase> context =
+                new MockSplitEnumeratorContext<>(1)) {
+            FlinkSourceEnumerator enumerator =
+                    new FlinkSourceEnumerator(
+                            DEFAULT_TABLE_PATH,
+                            flussConf,
+                            true,
+                            false,
+                            context,
+                            OffsetsInitializer.timestamp(1000L),
+                            DEFAULT_SCAN_PARTITION_DISCOVERY_INTERVAL_MS,
+                            streaming,
+                            null,
+                            null,
+                            LeaseContext.DEFAULT,
+                            false);
+
+            checkpointState = enumerator.snapshotState(1L);
+            assertThat(checkpointState.getRemainingHybridLakeFlussSplits()).isNotNull().isEmpty();
+        }
+
+        try (MockSplitEnumeratorContext<SourceSplitBase> context =
+                        new MockSplitEnumeratorContext<>(DEFAULT_BUCKET_NUM);
+                MockWorkExecutor workExecutor = new MockWorkExecutor(context);
+                FlinkSourceEnumerator restoredEnumerator =
+                        new FlinkSourceEnumerator(
+                                DEFAULT_TABLE_PATH,
+                                flussConf,
+                                false,
+                                true,
+                                context,
+                                checkpointState.getAssignedBuckets(),
+                                checkpointState.getAssignedPartitions(),
+                                checkpointState.getRemainingHybridLakeFlussSplits(),
+                                OffsetsInitializer.full(),
+                                DEFAULT_SCAN_PARTITION_DISCOVERY_INTERVAL_MS,
+                                streaming,
+                                null,
+                                lakeSource,
+                                workExecutor,
+                                LeaseContext.DEFAULT,
+                                true)) {
+            restoredEnumerator.start();
+            runPeriodicPartitionDiscovery(workExecutor);
+
+            for (int i = 0; i < DEFAULT_BUCKET_NUM; i++) {
+                registerReader(context, restoredEnumerator, i);
+            }
+
+            List<SourceSplitBase> assignedSplits =
+                    getReadersAssignments(context).values().stream()
+                            .flatMap(List::stream)
+                            .collect(Collectors.toList());
+            assertThat(assignedSplits).isNotEmpty();
+            assertThat(assignedSplits).allMatch(split -> split instanceof LogSplit);
+            assertThat(assignedSplits).noneMatch(split -> split instanceof LakeSnapshotSplit);
+        }
+    }
+
     @Test
     void testPkTableWithSnapshotSplits() throws Throwable {
         long tableId = createTable(DEFAULT_TABLE_PATH, DEFAULT_PK_TABLE_DESCRIPTOR);