Merge branch 'main' into mergify/configuration-deprecated-update

XuPeng-SH · web-flow · commit 71475a70815d · 2026-04-08T00:56:53.000+08:00
diff --git a/pkg/common/morpc/client_test.go b/pkg/common/morpc/client_test.go
@@ -417,19 +417,11 @@ func TestCloseIdleBackends(t *testing.T) {
 	tb.activeTime = time.Time{}
 	tb.Unlock()
 
-	go func() {
-		ctx, cancel := context.WithTimeout(context.TODO(), 2*time.Second)
-		defer cancel()
-		st, err := activeBackend.NewStream(false)
-		assert.NoError(t, err)
-		for i := 0; i < 50; i++ {
-			_ = st.Send(ctx, newTestMessage(1))
-			runtime.Gosched()
-		}
-	}()
-
 	gcDeadline := time.Now().Add(10 * time.Second)
 	for time.Now().Before(gcDeadline) {
+		// Refresh the non-idle backend inline so GC deterministically closes only the
+		// backend we explicitly marked idle, without relying on goroutine scheduling.
+		activeBackend.(*testBackend).active()
 		globalClientGC.doGCIdle()
 		runtime.Gosched()
 		c.mu.Lock()
diff --git a/pkg/sql/colexec/onduplicatekey/on_duplicate_key.go b/pkg/sql/colexec/onduplicatekey/on_duplicate_key.go
@@ -17,6 +17,7 @@ package onduplicatekey
 import (
 	"bytes"
 	"fmt"
+	"math"
 
 	"github.com/matrixorigin/matrixone/pkg/common/moerr"
 	"github.com/matrixorigin/matrixone/pkg/container/batch"
@@ -112,23 +113,29 @@ func resetInsertBatchForOnduplicateKey(proc *process.Process, originBatch *batch
 		insertArg.ctr.rbat = batch.NewWithSize(len(insertArg.Attrs))
 		insertArg.ctr.rbat.Attrs = insertArg.Attrs
 
-		insertArg.ctr.checkConflictBat = batch.NewWithSize(len(insertArg.Attrs))
-		insertArg.ctr.checkConflictBat.Attrs = append(insertArg.ctr.checkConflictBat.Attrs, insertArg.Attrs...)
-
 		for i, v := range originBatch.Vecs {
 			newVec := vector.NewVec(*v.GetType())
 			insertArg.ctr.rbat.SetVector(int32(i), newVec)
+		}
 
-			ckVec := vector.NewVec(*v.GetType())
-			insertArg.ctr.checkConflictBat.SetVector(int32(i), ckVec)
+		// Initialize hash-based conflict detection
+		insertArg.ctr.uniqueKeyColIndices = make([][]int32, len(insertArg.UniqueColCheckExpr))
+		insertArg.ctr.conflictMaps = make([]map[string]int, len(insertArg.UniqueColCheckExpr))
+		for i, expr := range insertArg.UniqueColCheckExpr {
+			insertArg.ctr.uniqueKeyColIndices[i] = extractColIndicesFromExpr(expr)
+			if len(insertArg.ctr.uniqueKeyColIndices[i]) == 0 {
+				return moerr.NewInternalErrorf(proc.Ctx, "failed to extract column indices from unique constraint expression %d", i)
+			}
+			insertArg.ctr.conflictMaps[i] = make(map[string]int)
 		}
 	} else {
 		insertArg.ctr.rbat.CleanOnlyData()
-		insertArg.ctr.checkConflictBat.CleanOnlyData()
+		for i := range insertArg.ctr.conflictMaps {
+			clear(insertArg.ctr.conflictMaps[i])
+		}
 	}
 
 	insertBatch := insertArg.ctr.rbat
-	checkConflictBatch := insertArg.ctr.checkConflictBat
 	attrs := make([]string, len(insertBatch.Attrs))
 	copy(attrs, insertBatch.Attrs)
 
@@ -140,12 +147,10 @@ func resetInsertBatchForOnduplicateKey(proc *process.Process, originBatch *batch
 			return err
 		}
 
-		// check if uniqueness conflict found in checkConflictBatch
-		oldConflictRowIdx, conflictMsg, err := checkConflict(proc, newBatch, checkConflictBatch, insertArg.ctr.uniqueCheckExes, insertArg.UniqueCols, insertColCount)
-		if err != nil {
-			newBatch.Clean(proc.GetMPool())
-			return err
-		}
+		// O(1) hash map conflict check instead of O(N) linear scan
+		oldConflictRowIdx, conflictMsg := findConflictByHashMap(
+			&insertArg.ctr.keyBuf, newBatch.Vecs, insertArg.ctr.uniqueKeyColIndices,
+			insertArg.ctr.conflictMaps, insertArg.UniqueCols, 0)
 		if oldConflictRowIdx > -1 {
 
 			if insertArg.IsIgnore {
@@ -177,6 +182,13 @@ func resetInsertBatchForOnduplicateKey(proc *process.Process, originBatch *batch
 				newBatch.Clean(proc.GetMPool())
 				return err
 			}
+
+			// Save old keys before in-place update (in case update changes unique columns)
+			oldKeys := make([]string, len(insertArg.ctr.uniqueKeyColIndices))
+			for k, colIndices := range insertArg.ctr.uniqueKeyColIndices {
+				oldKeys[k] = serializeUniqueKey(&insertArg.ctr.keyBuf, insertBatch.Vecs, colIndices, oldConflictRowIdx)
+			}
+
 			// update the oldConflictRowIdx of insertBatch by newBatch
 			for j := 0; j < insertColCount; j++ {
 				fromVec := tmpBatch.Vecs[j]
@@ -187,15 +199,19 @@ func resetInsertBatchForOnduplicateKey(proc *process.Process, originBatch *batch
 					newBatch.Clean(proc.GetMPool())
 					return err
 				}
+			}
 
-				toVec2 := checkConflictBatch.Vecs[j]
-				err = toVec2.Copy(fromVec, int64(oldConflictRowIdx), 0, proc.Mp())
-				if err != nil {
-					tmpBatch.Clean(proc.GetMPool())
-					newBatch.Clean(proc.GetMPool())
-					return err
+			// Update hash maps after in-place modification
+			for k, colIndices := range insertArg.ctr.uniqueKeyColIndices {
+				if oldKeys[k] != "" {
+					delete(insertArg.ctr.conflictMaps[k], oldKeys[k])
+				}
+				newKey := serializeUniqueKey(&insertArg.ctr.keyBuf, insertBatch.Vecs, colIndices, oldConflictRowIdx)
+				if newKey != "" {
+					insertArg.ctr.conflictMaps[k][newKey] = oldConflictRowIdx
 				}
 			}
+
 			tmpBatch.Clean(proc.GetMPool())
 		} else {
 			// row id is null: means no uniqueness conflict found in origin rows
@@ -205,11 +221,8 @@ func resetInsertBatchForOnduplicateKey(proc *process.Process, originBatch *batch
 					newBatch.Clean(proc.GetMPool())
 					return err
 				}
-				_, err = checkConflictBatch.Append(proc.Ctx, proc.Mp(), newBatch)
-				if err != nil {
-					newBatch.Clean(proc.GetMPool())
-					return err
-				}
+				addToConflictMaps(&insertArg.ctr.keyBuf, insertBatch.Vecs, insertArg.ctr.uniqueKeyColIndices,
+					insertArg.ctr.conflictMaps, insertBatch.RowCount()-1)
 			} else {
 
 				if insertArg.IsIgnore {
@@ -222,12 +235,9 @@ func resetInsertBatchForOnduplicateKey(proc *process.Process, originBatch *batch
 					newBatch.Clean(proc.GetMPool())
 					return err
 				}
-				conflictRowIdx, conflictMsg, err := checkConflict(proc, tmpBatch, checkConflictBatch, insertArg.ctr.uniqueCheckExes, insertArg.UniqueCols, insertColCount)
-				if err != nil {
-					tmpBatch.Clean(proc.GetMPool())
-					newBatch.Clean(proc.GetMPool())
-					return err
-				}
+				conflictRowIdx, conflictMsg := findConflictByHashMap(
+					&insertArg.ctr.keyBuf, tmpBatch.Vecs, insertArg.ctr.uniqueKeyColIndices,
+					insertArg.ctr.conflictMaps, insertArg.UniqueCols, 0)
 				if conflictRowIdx > -1 {
 					tmpBatch.Clean(proc.GetMPool())
 					newBatch.Clean(proc.GetMPool())
@@ -240,12 +250,8 @@ func resetInsertBatchForOnduplicateKey(proc *process.Process, originBatch *batch
 						newBatch.Clean(proc.GetMPool())
 						return err
 					}
-					_, err = checkConflictBatch.Append(proc.Ctx, proc.Mp(), tmpBatch)
-					if err != nil {
-						tmpBatch.Clean(proc.GetMPool())
-						newBatch.Clean(proc.GetMPool())
-						return err
-					}
+					addToConflictMaps(&insertArg.ctr.keyBuf, insertBatch.Vecs, insertArg.ctr.uniqueKeyColIndices,
+						insertArg.ctr.conflictMaps, insertBatch.RowCount()-1)
 				}
 				tmpBatch.Clean(proc.GetMPool())
 			}
@@ -269,6 +275,123 @@ func resetColPos(e *plan.Expr, columnCount int) {
 	}
 }
 
+// extractColIndicesFromExpr extracts the left-side column indices from a unique check expression.
+// For a single-column unique key: "col_i = col_j" → [i]
+// For a composite unique key: "(col_i = col_j AND col_k = col_l)" → [i, k]
+func extractColIndicesFromExpr(expr *plan.Expr) []int32 {
+	switch e := expr.Expr.(type) {
+	case *plan.Expr_F:
+		if e.F.Func.ObjName == "=" {
+			if col := extractColRefFromExpr(e.F.Args[0]); col != nil {
+				return []int32{col.Col.ColPos}
+			}
+		} else if e.F.Func.ObjName == "and" {
+			left := extractColIndicesFromExpr(e.F.Args[0])
+			right := extractColIndicesFromExpr(e.F.Args[1])
+			return append(left, right...)
+		}
+	}
+	return nil
+}
+
+// extractColRefFromExpr recursively unwraps cast/type expressions to find the underlying column reference.
+func extractColRefFromExpr(expr *plan.Expr) *plan.Expr_Col {
+	switch e := expr.Expr.(type) {
+	case *plan.Expr_Col:
+		return e
+	case *plan.Expr_F:
+		// Handle cast-like functions: try first argument
+		if len(e.F.Args) > 0 {
+			return extractColRefFromExpr(e.F.Args[0])
+		}
+	}
+	return nil
+}
+
+// serializeUniqueKey serializes unique key column values into a string for hash map lookup.
+// Returns empty string if any column is NULL (NULL never conflicts per SQL semantics).
+// The caller-provided buf is reset and reused to avoid per-call allocations.
+// Float types are canonicalized to match SQL '=' semantics (scale-based rounding, -0/+0 normalization).
+func serializeUniqueKey(buf *bytes.Buffer, vecs []*vector.Vector, colIndices []int32, row int) string {
+	buf.Reset()
+	for _, colIdx := range colIndices {
+		v := vecs[colIdx]
+		if v.GetNulls().Contains(uint64(row)) {
+			return ""
+		}
+		typ := v.GetType()
+		switch typ.Oid {
+		case types.T_float32:
+			val := vector.MustFixedColWithTypeCheck[float32](v)[row]
+			if typ.Scale > 0 {
+				pow := math.Pow10(int(typ.Scale))
+				val = float32(math.Round(float64(val)*pow) / pow)
+			}
+			if val == 0 {
+				val = 0
+			}
+			bits := math.Float32bits(val)
+			buf.Write([]byte{0, 0, 0, 4, byte(bits >> 24), byte(bits >> 16), byte(bits >> 8), byte(bits)})
+		case types.T_float64:
+			val := vector.MustFixedColWithTypeCheck[float64](v)[row]
+			if val == 0 {
+				val = 0
+			}
+			bits := math.Float64bits(val)
+			buf.Write([]byte{0, 0, 0, 8,
+				byte(bits >> 56), byte(bits >> 48), byte(bits >> 40), byte(bits >> 32),
+				byte(bits >> 24), byte(bits >> 16), byte(bits >> 8), byte(bits)})
+		default:
+			b := v.GetRawBytesAt(row)
+			l := len(b)
+			buf.WriteByte(byte(l >> 24))
+			buf.WriteByte(byte(l >> 16))
+			buf.WriteByte(byte(l >> 8))
+			buf.WriteByte(byte(l))
+			buf.Write(b)
+		}
+	}
+	return buf.String()
+}
+
+// findConflictByHashMap checks if a row conflicts with any existing row using hash maps.
+// Returns the conflicting row index and message, or (-1, "") if no conflict.
+func findConflictByHashMap(
+	buf *bytes.Buffer,
+	vecs []*vector.Vector,
+	uniqueKeyColIndices [][]int32,
+	conflictMaps []map[string]int,
+	uniqueCols []string,
+	row int,
+) (int, string) {
+	for i, colIndices := range uniqueKeyColIndices {
+		key := serializeUniqueKey(buf, vecs, colIndices, row)
+		if key == "" {
+			continue
+		}
+		if idx, exists := conflictMaps[i][key]; exists {
+			return idx, fmt.Sprintf("Duplicate entry for key '%s'", uniqueCols[i])
+		}
+	}
+	return -1, ""
+}
+
+// addToConflictMaps adds a row's unique key values to all conflict hash maps.
+func addToConflictMaps(
+	buf *bytes.Buffer,
+	vecs []*vector.Vector,
+	uniqueKeyColIndices [][]int32,
+	conflictMaps []map[string]int,
+	rowIdx int,
+) {
+	for i, colIndices := range uniqueKeyColIndices {
+		key := serializeUniqueKey(buf, vecs, colIndices, rowIdx)
+		if key != "" {
+			conflictMaps[i][key] = rowIdx
+		}
+	}
+}
+
 func fetchOneRowAsBatch(idx int, originBatch *batch.Batch, proc *process.Process, attrs []string) (*batch.Batch, error) {
 	newBatch := batch.NewWithSize(len(attrs))
 	newBatch.Attrs = attrs
@@ -329,6 +452,9 @@ func updateOldBatch(evalBatch *batch.Batch, updateExpr map[string]*plan.Expr, pr
 	return newBatch, nil
 }
 
+// checkConflict uses expression evaluation to detect conflicts in checkConflictBatch.
+// This is the legacy O(N) per-call approach, kept for testing purposes.
+// The hot path now uses findConflictByHashMap for O(1) lookups instead.
 func checkConflict(proc *process.Process, newBatch *batch.Batch, checkConflictBatch *batch.Batch,
 	checkExpressionExecutor []colexec.ExpressionExecutor, uniqueCols []string, colCount int) (int, string, error) {
 	if checkConflictBatch.RowCount() == 0 {
diff --git a/pkg/sql/colexec/onduplicatekey/types.go b/pkg/sql/colexec/onduplicatekey/types.go
@@ -15,6 +15,8 @@
 package onduplicatekey
 
 import (
+	"bytes"
+
 	"github.com/matrixorigin/matrixone/pkg/common/reuse"
 	"github.com/matrixorigin/matrixone/pkg/container/batch"
 	"github.com/matrixorigin/matrixone/pkg/sql/colexec"
@@ -33,9 +35,14 @@ const (
 
 type container struct {
 	state            int
-	checkConflictBat *batch.Batch // batch to check conflict
+	checkConflictBat *batch.Batch // deprecated: kept for cleanup safety
 	rbat             *batch.Batch // return batch
 	uniqueCheckExes  []colexec.ExpressionExecutor
+
+	// Hash-based conflict detection (replaces O(N²) linear scan)
+	uniqueKeyColIndices [][]int32        // column indices for each unique constraint
+	conflictMaps        []map[string]int // serialized_unique_key → row_index in rbat
+	keyBuf              bytes.Buffer     // reusable buffer for serializeUniqueKey
 }
 
 type OnDuplicatekey struct {
@@ -102,6 +109,9 @@ func (onDuplicatekey *OnDuplicatekey) Reset(proc *process.Process, pipelineFaile
 			exe.ResetForNextQuery()
 		}
 	}
+	for i := range onDuplicatekey.ctr.conflictMaps {
+		clear(onDuplicatekey.ctr.conflictMaps[i])
+	}
 	onDuplicatekey.ctr.state = Build
 }
 
@@ -120,6 +130,8 @@ func (onDuplicatekey *OnDuplicatekey) Free(proc *process.Process, pipelineFailed
 		}
 	}
 	onDuplicatekey.ctr.uniqueCheckExes = nil
+	onDuplicatekey.ctr.conflictMaps = nil
+	onDuplicatekey.ctr.uniqueKeyColIndices = nil
 }
 
 func (onDuplicatekey *OnDuplicatekey) ExecProjection(proc *process.Process, input *batch.Batch) (*batch.Batch, error) {

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,8 @@`
`15`	`15`	`package onduplicatekey`
`16`	`16`
`17`	`17`	`import (`
	`18`	`+ "bytes"`
	`19`	`+`
`18`	`20`	`"github.com/matrixorigin/matrixone/pkg/common/reuse"`
`19`	`21`	`"github.com/matrixorigin/matrixone/pkg/container/batch"`
`20`	`22`	`"github.com/matrixorigin/matrixone/pkg/sql/colexec"`
`@@ -33,9 +35,14 @@ const (`
`33`	`35`
`34`	`36`	`type container struct {`
`35`	`37`	`state int`
`36`		`- checkConflictBat *batch.Batch // batch to check conflict`
	`38`	`+ checkConflictBat *batch.Batch // deprecated: kept for cleanup safety`
`37`	`39`	`rbat *batch.Batch // return batch`
`38`	`40`	`uniqueCheckExes []colexec.ExpressionExecutor`
	`41`	`+`
	`42`	`+ // Hash-based conflict detection (replaces O(N²) linear scan)`
	`43`	`+ uniqueKeyColIndices [][]int32 // column indices for each unique constraint`
	`44`	`+ conflictMaps []map[string]int // serialized_unique_key → row_index in rbat`
	`45`	`+ keyBuf bytes.Buffer // reusable buffer for serializeUniqueKey`
`39`	`46`	`}`
`40`	`47`
`41`	`48`	`type OnDuplicatekey struct {`
`@@ -102,6 +109,9 @@ func (onDuplicatekey OnDuplicatekey) Reset(proc process.Process, pipelineFaile`
`102`	`109`	`exe.ResetForNextQuery()`
`103`	`110`	`}`
`104`	`111`	`}`
	`112`	`+ for i := range onDuplicatekey.ctr.conflictMaps {`
	`113`	`+ clear(onDuplicatekey.ctr.conflictMaps[i])`
	`114`	`+ }`
`105`	`115`	`onDuplicatekey.ctr.state = Build`
`106`	`116`	`}`
`107`	`117`
`@@ -120,6 +130,8 @@ func (onDuplicatekey OnDuplicatekey) Free(proc process.Process, pipelineFailed`
`120`	`130`	`}`
`121`	`131`	`}`
`122`	`132`	`onDuplicatekey.ctr.uniqueCheckExes = nil`
	`133`	`+ onDuplicatekey.ctr.conflictMaps = nil`
	`134`	`+ onDuplicatekey.ctr.uniqueKeyColIndices = nil`
`123`	`135`	`}`
`124`	`136`
`125`	`137`	`func (onDuplicatekey OnDuplicatekey) ExecProjection(proc process.Process, input batch.Batch) (batch.Batch, error) {`