Skip to content

Commit 384ed76

Browse files
authored
KB max batch size: apply insert limit globally (#11905)
1 parent 1876184 commit 384ed76

2 files changed

Lines changed: 4 additions & 3 deletions

File tree

mindsdb/interfaces/agents/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@
208208
USER_COLUMN = "question"
209209
DEFAULT_EMBEDDINGS_MODEL_PROVIDER = "openai"
210210
DEFAULT_EMBEDDINGS_MODEL_CLASS = OpenAIEmbeddings
211-
MAX_INSERT_BATCH_SIZE = 50_000
211+
MAX_INSERT_BATCH_SIZE = int(os.getenv("KB_MAX_INSERT_BATCH_SIZE", 50_000))
212212
DEFAULT_TIKTOKEN_MODEL_NAME = os.getenv("DEFAULT_TIKTOKEN_MODEL_NAME", "gpt-4")
213213
AGENT_CHUNK_POLLING_INTERVAL_SECONDS = os.getenv("AGENT_CHUNK_POLLING_INTERVAL_SECONDS", 1.0)
214214
DEFAULT_TEXT2SQL_DATABASE = "mindsdb"

mindsdb/interfaces/knowledge_base/controller.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -586,8 +586,6 @@ def insert_rows(self, rows: List[Dict]):
586586
"""Process and insert raw data rows"""
587587
if not rows:
588588
return
589-
if len(rows) > MAX_INSERT_BATCH_SIZE:
590-
raise ValueError("Input data is too large, please load data in batches")
591589

592590
df = pd.DataFrame(rows)
593591

@@ -700,6 +698,9 @@ def insert(self, df: pd.DataFrame, params: dict = None):
700698
if df.empty:
701699
return
702700

701+
if len(df) > MAX_INSERT_BATCH_SIZE:
702+
raise ValueError("Input data is too large, please load data in batches")
703+
703704
try:
704705
run_query_id = ctx.run_query_id
705706
# Link current KB to running query (where KB is used to insert data)

0 commit comments

Comments
 (0)