Fix the max context length for aliyun-green-checker (#988)

csg-pr-bot · Dev Agent · cemeng · web-flow · commit abaf90e932ca · 2026-04-10T12:40:04.000+08:00
Co-authored-by: Dev Agent &lt;dev-agent@example.com&gt;
Co-authored-by: cemeng &lt;cemengzhang@yntengyun.com&gt;
diff --git a/aigateway/component/moderation.go b/aigateway/component/moderation.go
@@ -21,10 +21,9 @@ import (
 )
 
 const (
-	// max content length
-	maxContentLength = 6144
-	// sliding window size
-	slidingWindowSize = 2000
+	// max content length for moderation
+	defaultMaxContentLength = 2000 // sliding window size
+	slidingWindowSize       = 2000
 	// cache ttl
 	cacheTTL = 24 * time.Hour
 	// moderation cache prefix
@@ -59,10 +58,11 @@ type StreamChecker interface {
 }
 
 type moderationImpl struct {
-	modSvcClient  rpc.ModerationSvcClient
-	cacheClient   cache.RedisClient
-	config        *config.Config
-	streamChecker StreamChecker
+	modSvcClient     rpc.ModerationSvcClient
+	cacheClient      cache.RedisClient
+	config           *config.Config
+	streamChecker    StreamChecker
+	maxContentLength int
 }
 
 type syncStreamChecker struct {
@@ -262,10 +262,15 @@ func NewModerationImpl(config *config.Config) Moderation {
 }
 
 func NewModerationImplWithClient(config *config.Config, modSvcClient rpc.ModerationSvcClient, cacheClient cache.RedisClient) Moderation {
+	maxContentLength := config.SensitiveCheck.MaxContentLength
+	if config.SensitiveCheck.MaxContentLength <= 0 {
+		maxContentLength = defaultMaxContentLength
+	}
 	modImpl := &moderationImpl{
-		modSvcClient: modSvcClient,
-		cacheClient:  cacheClient,
-		config:       config,
+		modSvcClient:     modSvcClient,
+		cacheClient:      cacheClient,
+		maxContentLength: maxContentLength,
+		config:           config,
 	}
 
 	initStreamChecker(modImpl)
@@ -478,7 +483,7 @@ func (modImpl *moderationImpl) CheckChatPrompts(ctx context.Context, messages []
 func (modImpl *moderationImpl) checkLLMPrompt(ctx context.Context, content, key string, isStream bool) (*rpc.CheckResult, error) {
 	content = strings.ReplaceAll(content, `\\n`, "\n")
 	content = strings.ReplaceAll(content, `\n`, "")
-	if len(content) < maxContentLength {
+	if len(content) < modImpl.maxContentLength {
 		return modImpl.checkSingleChunk(ctx, content, key, isStream)
 	}
 
@@ -544,7 +549,7 @@ func (modImpl *moderationImpl) checkLLMPrompt(ctx context.Context, content, key
 			separatorLen = 1 // for "."
 		}
 
-		if buffer.Len()+separatorLen+len(chunk) > maxContentLength && buffer.Len() > 0 {
+		if buffer.Len()+separatorLen+len(chunk) > modImpl.maxContentLength && buffer.Len() > 0 {
 			result, err := modImpl.checkBuffer(ctx, buffer.String(), currentBufferChunks, key, isStream)
 			if err != nil {
 				return nil, fmt.Errorf("failed to call moderation on buffer: %w", err)
diff --git a/aigateway/component/moderation_test.go b/aigateway/component/moderation_test.go
@@ -2,10 +2,12 @@ package component
 
 import (
 	"context"
+	"strings"
 	"testing"
 	"time"
 
 	lru "github.com/hashicorp/golang-lru/v2"
+	"github.com/openai/openai-go/v3"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 	"opencsg.com/csghub-server/aigateway/types"
@@ -395,3 +397,83 @@ func TestInitStreamChecker(t *testing.T) {
 		assert.Equal(t, 100, checker.maxChars)
 	})
 }
+
+func TestModerationImpl_checkLLMPrompt(t *testing.T) {
+	ctx := context.Background()
+	mockSvcClient := new(MockModerationSvcClient)
+
+	modImpl := &moderationImpl{
+		modSvcClient:     mockSvcClient,
+		maxContentLength: 10,
+	}
+
+	t.Run("short content", func(t *testing.T) {
+		mockSvcClient.ExpectedCalls = nil
+		mockSvcClient.On("PassLLMPromptCheck", mock.Anything, mock.Anything).Return(&rpc.CheckResult{IsSensitive: false}, nil).Once()
+		
+		res, err := modImpl.checkLLMPrompt(ctx, "short", "test-key", false)
+		assert.NoError(t, err)
+		assert.False(t, res.IsSensitive)
+	})
+
+	t.Run("long content chunking", func(t *testing.T) {
+		mockSvcClient.ExpectedCalls = nil
+		// 20 chars, max length is 10, so it will be chunked
+		// splitContentIntoChunksByWindow logic: if chunk size is maxContentLength (10)?
+		// wait, splitContentIntoChunksByWindow splits by 2000! 
+		// Actually, splitContentIntoChunksByWindow has slidingWindowSize = 2000 hardcoded in moderation.go
+		
+		// If we use 3000 chars, it will be chunked
+		modImpl.maxContentLength = 2000
+		longText := strings.Repeat("a", 3000)
+		mockSvcClient.On("PassLLMPromptCheck", mock.Anything, mock.Anything).Return(&rpc.CheckResult{IsSensitive: false}, nil)
+		
+		res, err := modImpl.checkLLMPrompt(ctx, longText, "test-key", false)
+		assert.NoError(t, err)
+		assert.False(t, res.IsSensitive)
+	})
+}
+
+func TestModerationImpl_CheckChatPrompts(t *testing.T) {
+	ctx := context.Background()
+	mockSvcClient := new(MockModerationSvcClient)
+
+	modImpl := &moderationImpl{
+		modSvcClient:     mockSvcClient,
+		maxContentLength: 2000,
+	}
+
+	t.Run("nil modSvcClient", func(t *testing.T) {
+		emptyModImpl := &moderationImpl{modSvcClient: nil}
+		res, err := emptyModImpl.CheckChatPrompts(ctx, nil, "uuid", false)
+		assert.NoError(t, err)
+		assert.False(t, res.IsSensitive)
+	})
+
+	t.Run("normal message", func(t *testing.T) {
+		mockSvcClient.ExpectedCalls = nil
+		mockSvcClient.On("PassLLMPromptCheck", mock.Anything, mock.Anything).Return(&rpc.CheckResult{IsSensitive: false}, nil).Once()
+		
+		messages := []openai.ChatCompletionMessageParamUnion{
+			openai.UserMessage("Hello"),
+		}
+		
+		res, err := modImpl.CheckChatPrompts(ctx, messages, "uuid", false)
+		assert.NoError(t, err)
+		assert.False(t, res.IsSensitive)
+	})
+
+	t.Run("sensitive message", func(t *testing.T) {
+		mockSvcClient.ExpectedCalls = nil
+		mockSvcClient.On("PassLLMPromptCheck", mock.Anything, mock.Anything).Return(&rpc.CheckResult{IsSensitive: true, Reason: "toxic"}, nil).Once()
+		
+		messages := []openai.ChatCompletionMessageParamUnion{
+			openai.UserMessage("Bad words"),
+		}
+		
+		res, err := modImpl.CheckChatPrompts(ctx, messages, "uuid", false)
+		assert.NoError(t, err)
+		assert.True(t, res.IsSensitive)
+		assert.Equal(t, "toxic", res.Reason)
+	})
+}
diff --git a/aigateway/handler/openai.go b/aigateway/handler/openai.go
@@ -362,10 +362,9 @@ func (h *OpenAIHandlerImpl) Chat(c *gin.Context) {
 		key := fmt.Sprintf("%s:%s", userUUID, modelID)
 		result, err := h.modComponent.CheckChatPrompts(c.Request.Context(), chatReq.Messages, key, chatReq.Stream)
 		if err != nil {
-			c.String(http.StatusInternalServerError, fmt.Errorf("failed to call moderation error:%w", err).Error())
-			return
+			slog.ErrorContext(c.Request.Context(), "failed to call moderation", slog.Any("error", err))
 		}
-		if result.IsSensitive {
+		if result != nil && result.IsSensitive {
 			handleSensitiveResponse(c, chatReq.Stream, result)
 			return
 		}
diff --git a/aigateway/handler/openai_test.go b/aigateway/handler/openai_test.go
@@ -521,9 +521,28 @@ func TestOpenAIHandler_Chat(t *testing.T) {
 		_ = json.Unmarshal(body, &expectReq)
 		tester.mocks.moderationComp.EXPECT().CheckChatPrompts(mock.Anything, expectReq.Messages, "testuuid:"+model.ID, false).
 			Return(nil, errors.New("some error"))
+		llmTokenCounter := mocktoken.NewMockChatTokenCounter(t)
+		tester.mocks.tokenCounterFactory.EXPECT().NewChat(
+			token.CreateParam{
+				Endpoint: model.Endpoint,
+				Host:     "",
+				Model:    "model1",
+				ImageID:  model.ImageID,
+				Provider: model.Provider,
+			}).
+			Return(llmTokenCounter)
+		llmTokenCounter.EXPECT().AppendPrompts(expectReq.Messages).Return()
+		var wg sync.WaitGroup
+		wg.Add(1)
+		tester.mocks.openAIComp.EXPECT().RecordUsage(mock.Anything, "testuuid", model, llmTokenCounter).
+			RunAndReturn(func(ctx context.Context, uuid string, model *types.Model, counter token.Counter) error {
+				wg.Done()
+				return nil
+			})
 		tester.handler.Chat(c)
+		wg.Wait()
 
-		assert.Equal(t, http.StatusInternalServerError, w.Code)
+		assert.Equal(t, http.StatusOK, w.Code)
 	})
 	t.Run("success", func(t *testing.T) {
 		tester, c, w := setupTest(t)
diff --git a/aigateway/handler/response_writer_wrapper_test.go b/aigateway/handler/response_writer_wrapper_test.go
@@ -107,7 +107,7 @@ func TestHandleSensitiveResponse(t *testing.T) {
 		w := httptest.NewRecorder()
 		ctx, _ := gin.CreateTestContext(w)
 		ctx.Request = httptest.NewRequest("GET", "/", nil)
-		
+
 		checkResult := &rpc.CheckResult{Reason: "test reason"}
 		handleSensitiveResponse(ctx, true, checkResult)
 
@@ -122,7 +122,7 @@ func TestHandleSensitiveResponse(t *testing.T) {
 		w := httptest.NewRecorder()
 		ctx, _ := gin.CreateTestContext(w)
 		ctx.Request = httptest.NewRequest("GET", "/", nil)
-		
+
 		checkResult := &rpc.CheckResult{Reason: "test reason"}
 		handleSensitiveResponse(ctx, false, checkResult)
 
diff --git a/builder/rpc/moderation_svc_client.go b/builder/rpc/moderation_svc_client.go
@@ -7,8 +7,11 @@ import (
 	"opencsg.com/csghub-server/api/httpbase"
 	"opencsg.com/csghub-server/common/errorx"
 	"opencsg.com/csghub-server/common/types"
+	utils "opencsg.com/csghub-server/common/utils/common"
 )
 
+const PRINT_STRING_LEN = 1000
+
 type ModerationSvcClient interface {
 	PassTextCheck(ctx context.Context, scenario types.SensitiveScenario, text string) (*CheckResult, error)
 	PassImageCheck(ctx context.Context, scenario types.SensitiveScenario, ossBucketName, ossObjectName string) (*CheckResult, error)
@@ -48,6 +51,7 @@ func (c *ModerationSvcHttpClient) PassTextCheck(ctx context.Context, scenario ty
 	resp.Data = &CheckResult{}
 	err := c.hc.Post(ctx, path, req, &resp)
 	if err != nil {
+		slog.ErrorContext(ctx, "call moderation service failed", slog.String("error", err.Error()), slog.Any("req", req))
 		return nil, errorx.RemoteSvcFail(err,
 			errorx.Ctx().
 				Set("service", "moderation service").
@@ -65,7 +69,8 @@ func (c *ModerationSvcHttpClient) PassLLMRespCheck(ctx context.Context, req type
 	resp.Data = &CheckResult{}
 	err := c.hc.Post(ctx, path, req, &resp)
 	if err != nil {
-		slog.Error("call moderation service failed", slog.String("error", err.Error()))
+		req.Text = utils.TruncStringByRune(req.Text, PRINT_STRING_LEN)
+		slog.ErrorContext(ctx, "call moderation service failed", slog.String("error", err.Error()), slog.Any("req", req))
 		return nil, errorx.RemoteSvcFail(err,
 			errorx.Ctx().
 				Set("service", "moderation service").
@@ -140,7 +145,7 @@ func (c *ModerationSvcHttpClient) SubmitRepoCheck(ctx context.Context, repoType
 	var resp httpbase.R
 	err := c.hc.Post(ctx, path, req, &resp)
 	if err != nil {
-		slog.Error("call moderation service failed", slog.String("error", err.Error()))
+		slog.ErrorContext(ctx, "call moderation service failed", slog.String("error", err.Error()), slog.Any("req", req))
 		return errorx.RemoteSvcFail(err,
 			errorx.Ctx().
 				Set("service", "moderation service").
@@ -156,7 +161,8 @@ func (c *ModerationSvcHttpClient) PassLLMPromptCheck(ctx context.Context, req ty
 	resp.Data = &CheckResult{}
 	err := c.hc.Post(ctx, path, req, &resp)
 	if err != nil {
-		slog.Error("call moderation service failed", slog.String("error", err.Error()))
+		req.Text = utils.TruncStringByRune(req.Text, PRINT_STRING_LEN)
+		slog.ErrorContext(ctx, "call moderation service failed", slog.String("error", err.Error()), slog.Any("req", req))
 		return nil, errorx.RemoteSvcFail(err,
 			errorx.Ctx().
 				Set("service", "moderation service").
diff --git a/builder/sensitive/guard_llm.go b/builder/sensitive/guard_llm.go
@@ -112,7 +112,7 @@ func (c *OpenAILLMChecker) doCheck(ctx context.Context, req *types.LLMCheckReque
 	if c.config.SensitiveCheck.LLM.APIKey != "" {
 		headers["Authorization"] = "Bearer " + c.config.SensitiveCheck.LLM.APIKey
 	}
-	
+
 	// Retry mechanism for 429
 	var content string
 	var err error
diff --git a/common/config/config.go b/common/config/config.go
@@ -133,13 +133,15 @@ type Config struct {
 		CheckChain          []string `env:"STARHUB_SERVER_SENSITIVE_CHECK_CHECK_CHAIN" default:"[ac_automaton,mutable_ac_automaton,aliyun_green]"`
 		StreamCheckMode     string   `env:"STARHUB_SERVER_SENSITIVE_CHECK_STREAM_CHECK_MODE" default:"async"` // sync | async
 		AsyncBufferMaxChars int      `env:"STARHUB_SERVER_SENSITIVE_CHECK_ASYNC_BUFFER_MAX_CHARS" default:"50"`
+		// aliyun green max content length: 2000 | qwen guard max content length: 7000
+		MaxContentLength int `env:"STARHUB_SERVER_SENSITIVE_CHECK_MAX_CONTENT_LENGTH" default:"2000"`
 
 		LLM struct {
 			Enable           bool    `env:"STARHUB_SERVER_SENSITIVE_CHECK_LLM_ENABLE" default:"false"`
 			Endpoint         string  `env:"STARHUB_SERVER_SENSITIVE_CHECK_LLM_ENDPOINT"`
 			APIKey           string  `env:"STARHUB_SERVER_SENSITIVE_CHECK_LLM_API_KEY"`
 			GuardModel       string  `env:"STARHUB_SERVER_SENSITIVE_CHECK_LLM_GUARD_MODEL" default:"Qwen/Qwen3Guard-Gen-0.6B"`
-			GuardStreamModel string  `env:"STARHUB_SERVER_SENSITIVE_CHECK_LLM_GUARD_STREAM_MODEL" default:"Qwen/Qwen/Qwen3Guard-Gen-Stream-0.6B"`
+			GuardStreamModel string  `env:"STARHUB_SERVER_SENSITIVE_CHECK_LLM_GUARD_STREAM_MODEL" default:"Qwen/Qwen3Guard-Gen-Stream-0.6B"`
 			TimeoutMS        int     `env:"STARHUB_SERVER_SENSITIVE_CHECK_LLM_TIMEOUT_MS" default:"3000"`
 			MaxTokens        int     `env:"STARHUB_SERVER_SENSITIVE_CHECK_LLM_MAX_TOKENS" default:"128"`
 			Temperature      float64 `env:"STARHUB_SERVER_SENSITIVE_CHECK_LLM_TEMPERATURE" default:"0"`
diff --git a/common/utils/common/string.go b/common/utils/common/string.go
@@ -35,6 +35,20 @@ func TruncString(s string, limit int) string {
 	return string(s1)
 }
 
+func TruncStringByRune(s string, limit int) string {
+	runes := []rune(s)
+	if len(runes) <= limit {
+		return s
+	}
+
+	// Reserve 3 runes for "..."
+	if limit <= 3 {
+		return string(runes[:limit])
+	}
+
+	return string(runes[:limit-3]) + "..."
+}
+
 func MD5Hash(s string) string {
 	hash := md5.New()
 	hash.Write([]byte(s))
diff --git a/component/llm_service_test.go b/component/llm_service_test.go
@@ -140,9 +140,9 @@ func TestLLMServiceComponent_UpdateLLMConfig(t *testing.T) {
 	newName := "new-model"
 	metadata := map[string]any{"tasks": []any{"text-to-image"}}
 	req := &types.UpdateLLMConfigReq{
-		ID:       123,
+		ID:        123,
 		ModelName: &newName,
-		Metadata: &metadata,
+		Metadata:  &metadata,
 	}
 	dbLLMConfig := &database.LLMConfig{
 		ID:        123,
diff --git a/moderation/handler/sensitive.go b/moderation/handler/sensitive.go

Original file line number	Diff line number	Diff line change
`@@ -362,10 +362,9 @@ func (h OpenAIHandlerImpl) Chat(c gin.Context) {`
`362`	`362`	`key := fmt.Sprintf("%s:%s", userUUID, modelID)`
`363`	`363`	`result, err := h.modComponent.CheckChatPrompts(c.Request.Context(), chatReq.Messages, key, chatReq.Stream)`
`364`	`364`	`if err != nil {`
`365`		`- c.String(http.StatusInternalServerError, fmt.Errorf("failed to call moderation error:%w", err).Error())`
`366`		`- return`
	`365`	`+ slog.ErrorContext(c.Request.Context(), "failed to call moderation", slog.Any("error", err))`
`367`	`366`	`}`
`368`		`- if result.IsSensitive {`
	`367`	`+ if result != nil && result.IsSensitive {`
`369`	`368`	`handleSensitiveResponse(c, chatReq.Stream, result)`
`370`	`369`	`return`
`371`	`370`	`}`
Original file line number	Diff line number	Diff line change
`@@ -112,7 +112,7 @@ func (c OpenAILLMChecker) doCheck(ctx context.Context, req types.LLMCheckReque`
`112`	`112`	`if c.config.SensitiveCheck.LLM.APIKey != "" {`
`113`	`113`	`headers["Authorization"] = "Bearer " + c.config.SensitiveCheck.LLM.APIKey`
`114`	`114`	`}`
`115`		`-`
	`115`	`+`
`116`	`116`	`// Retry mechanism for 429`
`117`	`117`	`var content string`
`118`	`118`	`var err error`