matrixorigin · jiangxinmeng1 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
@@ -168,6 +168,15 @@ func (n *Bitmap) Len() int64 {
 	return n.len
 }
 
+// RecalculateCount recounts the number of set bits from the data array.
+// This is used to fix count after concurrent bitmap corruption.
+func (n *Bitmap) RecalculateCount() {
+	n.count = 0
+	for _, w := range n.data {
+		n.count += int64(bits.OnesCount64(w))
+	}
+}
+
 // Size return number of bytes in n.data
 // XXX WTF Note that this size is not the same as InitWithSize.
 func (n *Bitmap) Size() int {
@@ -349,7 +358,13 @@ func (n *Bitmap) TryExpandWithSize(size int) {
 		return
 	}
 	if len(n.data) < newCap {
+		oldLen := len(n.data)
 		n.data = n.data[:newCap]
+		// Zero out newly exposed slots to avoid reading stale data
+		// left over from a previous use of the same backing array.
+		for i := oldLen; i < newCap; i++ {
+			n.data[i] = 0
+		}
 	}
 }
 

@@ -135,7 +135,7 @@ func TryExpand(nsp *Nulls, size int) {
 
 // Contains returns true if the integer is contained in the Nulls
 func (nsp *Nulls) Contains(row uint64) bool {
-	return nsp != nil && !nsp.np.EmptyByFlag() && nsp.np.Contains(row)
+	return nsp != nil && nsp.np.Contains(row)
 }
 
 func Contains(nsp *Nulls, row uint64) bool {
@@ -226,8 +226,13 @@ func Range(nsp *Nulls, start, end, bias uint64, b *Nulls) {
 	}
 
 	b.np.InitWithSize(int64(end + 1 - bias))
+
+	// Take a snapshot of the source bitmap to prevent reading inconsistent
+	// state when the source is being concurrently modified by a parallel
+	// Prepare call on the same operator chain.
+	snap := nsp.Clone()
 	for ; start < end; start++ {
-		if nsp.np.Contains(start) {
+		if snap.np.Contains(start) {
 			b.np.Add(start - bias)
 		}
 	}

@@ -1329,6 +1329,9 @@ func (v *Vector) Copy(w *Vector, vi, wi int64, mp *mpool.MPool) error {
 	if w.GetNulls().Contains(uint64(wi)) {
 		v.GetNulls().Set(uint64(vi))
 	} else {
+		// Ensure bitmap len covers vi so that Contains() works correctly
+		// for all rows. TryExpand is needed because Unset doesn't expand.
+		nulls.TryExpand(v.GetNulls(), int(vi)+1)
 		v.GetNulls().Unset(uint64(vi))
 	}
 	return nil
@@ -1343,7 +1346,7 @@ func GetUnionAllFunction(typ types.Type, mp *mpool.MPool) func(v, w *Vector) err
 		u64Length := uint64(moreLength)
 
 		moreNp := more.GetBitmap()
-		if moreNp == nil || moreNp.EmptyByFlag() || moreLength == 0 {
+		if moreNp == nil || moreNp.IsEmpty() || moreLength == 0 {
 			return
 		}
 
@@ -1355,6 +1358,9 @@ func GetUnionAllFunction(typ types.Type, mp *mpool.MPool) func(v, w *Vector) err
 		if moreNp.Contains(0) {
 			dst.Set(u64offset)
 		}
+		// Ensure bitmap len covers the full range so Contains() works
+		// correctly for all rows, even trailing non-null rows.
+		nulls.TryExpand(dst, oldLength+moreLength)
 	}
 
 	switch typ.Oid {
@@ -2085,30 +2091,25 @@ func GetUnionAllFunction(typ types.Type, mp *mpool.MPool) func(v, w *Vector) err
 			var err error
 			vs := toSliceOfLengthNoTypeCheck[types.Varlena](v, v.length+w.length)
 
-			bm := w.nsp.GetBitmap()
-			if bm != nil && !bm.EmptyByFlag() {
-				for i := range ws {
-					if w.gsp.Contains(uint64(i)) {
-						nulls.Add(&v.gsp, uint64(v.length))
-					}
-					if bm.Contains(uint64(i)) {
-						nulls.Add(&v.nsp, uint64(v.length))
-					} else {
-						err = BuildVarlenaFromVarlena(v, &vs[v.length], &ws[i], &w.area, mp)
-						if err != nil {
-							return err
-						}
-					}
-					v.length++
+			// Always use null-aware path to prevent losing null information
+			// when bitmap state is inconsistent due to concurrent access.
+			for i := range ws {
+				if w.gsp.Contains(uint64(i)) {
+					nulls.Add(&v.gsp, uint64(v.length))
 				}
-			} else {
-				for i := range ws {
+				if w.nsp.Contains(uint64(i)) {
+					nulls.Add(&v.nsp, uint64(v.length))
+				} else {
 					err = BuildVarlenaFromVarlena(v, &vs[v.length], &ws[i], &w.area, mp)
 					if err != nil {
 						return err
 					}
-					v.length++
 				}
+				v.length++
+			}
+			// Ensure bitmap len covers all rows so Contains() works correctly
+			if v.nsp.Count() > 0 {
+				nulls.TryExpand(&v.nsp, v.length)
 			}
 			return nil
 		}
@@ -2647,7 +2648,7 @@ func unionT[T int32 | int64](v, w *Vector, sels []T, mp *mpool.MPool) error {
 		}
 	} else {
 		tlen := v.GetType().TypeSize()
-		if !w.nsp.EmptyByFlag() {
+		if !w.nsp.IsEmpty() {
 			for i, sel := range sels {
 				if w.gsp.Contains(uint64(sel)) {
 					nulls.Add(&v.gsp, uint64(oldLen+i))
@@ -2750,7 +2751,7 @@ func (v *Vector) UnionBatch(w *Vector, offset int64, cnt int, flags []uint8, mp
 		vCol = toSliceOfLengthNoTypeCheck[types.Varlena](v, v.length+addCnt)
 		ToSliceNoTypeCheck(w, &wCol)
 
-		if !w.nsp.EmptyByFlag() {
+		if !w.nsp.IsEmpty() {
 			if flags == nil {
 				for i := 0; i < cnt; i++ {
 					if w.gsp.Contains(uint64(offset) + uint64(i)) {
@@ -2785,6 +2786,8 @@ func (v *Vector) UnionBatch(w *Vector, offset int64, cnt int, flags []uint8, mp
 					v.length++
 				}
 			}
+			// Ensure bitmap len covers all rows after union
+			nulls.TryExpand(&v.nsp, v.length)
 		} else {
 			if flags == nil {
 				for i := 0; i < cnt; i++ {
@@ -2815,7 +2818,7 @@ func (v *Vector) UnionBatch(w *Vector, offset int64, cnt int, flags []uint8, mp
 		}
 	} else {
 		tlen := v.GetType().TypeSize()
-		if !w.nsp.EmptyByFlag() {
+		if !w.nsp.IsEmpty() {
 			if flags == nil {
 				for i := 0; i < cnt; i++ {
 					if w.gsp.Contains(uint64(offset) + uint64(i)) {
@@ -3425,6 +3428,11 @@ func appendOneFixed[T any](vec *Vector, val T, isNull bool, mp *mpool.MPool) err
 	if isNull {
 		nulls.Add(&vec.nsp, uint64(length))
 	} else {
+		// Ensure bitmap len covers the new position so Contains() works
+		// correctly even when the last appended rows are non-null.
+		if vec.nsp.Count() > 0 {
+			nulls.TryExpand(&vec.nsp, vec.length)
+		}
 		var col []T
 		ToSliceNoTypeCheck(vec, &col)
 		col[length] = val

@@ -18,9 +18,6 @@ import (
 	"bytes"
 	"context"
 	"fmt"
-	"strings"
-	"time"
-
 	"github.com/matrixorigin/matrixone/pkg/common/moerr"
 	"github.com/matrixorigin/matrixone/pkg/common/morpc"
 	"github.com/matrixorigin/matrixone/pkg/common/reuse"
@@ -44,6 +41,8 @@ import (
 	"github.com/matrixorigin/matrixone/pkg/vm/engine"
 	"github.com/matrixorigin/matrixone/pkg/vm/process"
 	"go.uber.org/zap"
+	"strings"
+	"time"
 )
 
 var (
@@ -155,6 +154,7 @@ func callNonBlocking(
 	}
 
 	lockOp.ctr.lockCount += int64(result.Batch.RowCount())
+
 	if err = performLock(result.Batch, proc, lockOp, analyzer, -1); err != nil {
 		return result, err
 	}
@@ -243,6 +243,7 @@ func performLock(
 				WithLockTable(target.lockTable, target.changeDef).
 				WithHasNewVersionInRangeFunc(lockOp.ctr.hasNewVersionInRange),
 		)
+
 		if lockOp.logger.Enabled(zap.DebugLevel) {
 			lockOp.logger.Debug("lock result",
 				zap.Uint64("table", target.tableID),

@@ -139,9 +139,8 @@ func (preInsert *PreInsert) constructColBuf(proc *proc, bat *batch.Batch, first
 				return err
 			}
 		} else {
+			preInsert.ctr.canFreeVecIdx[idx] = true
 			if bat.Vecs[idx].IsConst() {
-				preInsert.ctr.canFreeVecIdx[idx] = true
-				//expland const vector
 				typ := bat.Vecs[idx].GetType()
 				tmpVec := vector.NewOffHeapVecWithType(*typ)
 				if err = vector.GetUnionAllFunction(*typ, proc.Mp())(tmpVec, bat.Vecs[idx]); err != nil {
@@ -150,7 +149,11 @@ func (preInsert *PreInsert) constructColBuf(proc *proc, bat *batch.Batch, first
 				}
 				preInsert.ctr.buf.Vecs[idx] = tmpVec
 			} else {
-				preInsert.ctr.buf.SetVector(int32(idx), bat.Vecs[idx])
+				dupVec, dupErr := bat.Vecs[idx].Dup(proc.Mp())
+				if dupErr != nil {
+					return dupErr
+				}
+				preInsert.ctr.buf.Vecs[idx] = dupVec
 			}
 		}
 	}

@@ -79,9 +79,6 @@ func (projection *Projection) Call(proc *process.Process) (vm.CallResult, error)
 		}
 		// for projection operator, all Vectors of projectBat come from executor.Eval
 		// and will not be modified within projection operator. so we can used the result of executor.Eval directly.
-		// (if operator will modify vector/agg of batch, you should make a copy)
-		// however, it should be noted that since they directly come from executor.Eval
-		// these vectors cannot be free by batch.Clean directly and must be handed over executor.Free
 		projection.ctr.buf.Vecs[i] = vec
 	}
 	projection.maxAllocSize = max(projection.maxAllocSize, projection.ctr.buf.Size())

@@ -41,9 +41,10 @@ type ValueScan struct {
 
 type container struct {
 	// nowIdx indicates which data should send to next operator now.
-	nowIdx int
-	start  int
-	end    int
+	nowIdx   int
+	start    int
+	end      int
+	prepared bool
 }
 
 func init() {
@@ -73,6 +74,7 @@ func (valueScan *ValueScan) Reset(proc *process.Process, _ bool, _ error) {
 	valueScan.runningCtx.nowIdx = 0
 	valueScan.runningCtx.start = 0
 	valueScan.runningCtx.end = 0
+	valueScan.runningCtx.prepared = false
 
 	//for prepare stmt, valuescan batch vecs do not need to reset, when next execute, prepare just copy data to vecs, length is same to last execute
 	for i := 0; i < valueScan.ColCount; i++ {

@@ -67,6 +67,13 @@ func (valueScan *ValueScan) makeValueScanBatch(proc *process.Process) (err error
 	// select * from (values row(1,1), row(2,2), row(3,3)) a;
 	bat := valueScan.Batchs[0]
 
+	// Skip evalRowsetData if already done (prevents concurrent bitmap corruption
+	// when the same scope is started multiple times by nested MergeRun)
+	if valueScan.runningCtx.prepared {
+		return nil
+	}
+	valueScan.runningCtx.prepared = true
+
 	for i := 0; i < valueScan.ColCount; i++ {
 		exprList = valueScan.ExprExecLists[i]
 		if len(exprList) == 0 {
@@ -78,6 +85,14 @@ func (valueScan *ValueScan) makeValueScanBatch(proc *process.Process) (err error
 		}
 	}
 
+	// Fix bitmap count/data inconsistency that can occur when the same
+	// operator chain is started multiple times by nested MergeRun.
+	for _, vec := range bat.Vecs {
+		if vec != nil && !vec.IsConst() && vec.Length() > 0 {
+			vec.GetNulls().GetBitmap().RecalculateCount()
+		}
+	}
+
 	return nil
 }
 

@@ -295,6 +295,9 @@ func (c *Compile) fillPlanNodeAnalyzeInfo(stats *statistic.StatsInfo) {
 //----------------------------------------------------------------------------------------------------------------------
 
 func ConvertScopeToPhyScope(scope *Scope, receiverMap map[*process.WaitRegister]int) models.PhyScope {
+	if scope == nil {
+		return models.PhyScope{}
+	}
 	phyScope := models.PhyScope{
 		Magic:        scope.Magic.String(),
 		Mcpu:         int8(scope.NodeInfo.Mcpu),
@@ -319,6 +322,9 @@ func ConvertScopeToPhyScope(scope *Scope, receiverMap map[*process.WaitRegister]
 }
 
 func UpdatePreparePhyScope(scope *Scope, phyScope models.PhyScope) bool {
+	if scope == nil {
+		return true
+	}
 	res := UpdatePreparePhyOperator(scope.RootOp, phyScope.RootOperator)
 	if !res {
 		return false
@@ -714,6 +720,9 @@ func explainScopes(scopes []*Scope, gap int, rmp map[*process.WaitRegister]int,
 // It includes header information of Scope, data source information, and pipeline tree information.
 // In addition, it recursively displays information from any PreScopes.
 func explainSingleScope(scope *Scope, index int, gap int, rmp map[*process.WaitRegister]int, option *ExplainOption, buffer *bytes.Buffer) {
+	if scope == nil {
+		return
+	}
 	gapNextLine(gap, buffer)
 
 	// Scope Header

@@ -228,6 +228,9 @@ func (c *Compile) Reset(proc *process.Process, startAt time.Time, fill func(*bat
 }
 
 func UpdateScopeTxnOffset(scope *Scope, txnOffset int) {
+	if scope == nil {
+		return
+	}
 	scope.TxnOffset = txnOffset
 	for i := range scope.PreScopes {
 		UpdateScopeTxnOffset(scope.PreScopes[i], txnOffset)
@@ -302,6 +305,9 @@ func (c *Compile) clear() {
 }
 
 func (c *Compile) addAllAffectedRows(s *Scope) {
+	if s == nil {
+		return
+	}
 	for _, ps := range s.PreScopes {
 		c.addAllAffectedRows(ps)
 	}

@@ -647,6 +647,9 @@ func (c *Compile) InitPipelineContextToRetryQuery() {
 // buildContextFromParentCtx build the context for the pipeline tree.
 // the input parameter is the whole tree's parent context.
 func (s *Scope) buildContextFromParentCtx(parentCtx context.Context) {
+	if s == nil {
+		return
+	}
 	receiverCtx := s.Proc.BuildPipelineContext(parentCtx)
 
 	// build context for receiver.
@@ -664,6 +667,9 @@ func setContextForParallelScope(parallelScope *Scope, originalContext context.Co
 
 	// build context for data entry.
 	for _, prePipeline := range parallelScope.PreScopes {
+		if prePipeline == nil {
+			continue
+		}
 		prePipeline.buildContextFromParentCtx(parallelScope.Proc.Ctx)
 	}
 }

@@ -122,6 +122,9 @@ func DebugShowScopes(ss []*Scope, level DebugLevel) string {
 // genReceiverMap recursively traverses the Scope tree and generates unique identifiers (integers) for
 // each WaitRegister in Scope.
 func genReceiverMap(s *Scope, mp map[*process.WaitRegister]int) {
+	if s == nil {
+		return
+	}
 	for i := range s.PreScopes {
 		genReceiverMap(s.PreScopes[i], mp)
 	}
@@ -147,6 +150,9 @@ func showScopes(scopes []*Scope, gap int, rmp map[*process.WaitRegister]int, lev
 // It includes header information of Scope, data source information, and pipeline tree information.
 // In addition, it recursively displays information from any PreScopes.
 func showSingleScope(scope *Scope, index int, gap int, rmp map[*process.WaitRegister]int, level DebugLevel, buffer *bytes.Buffer) {
+	if scope == nil {
+		return
+	}
 	gapNextLine(gap, buffer)
 
 	// Scope Header