diff --git a/evaluation/.env-example b/evaluation/.env-example
index bda935442..0e94e9caa 100644
--- a/evaluation/.env-example
+++ b/evaluation/.env-example
@@ -22,13 +22,8 @@ SUPERMEMORY_API_KEY="sm_xxx"
 MEMOBASE_API_KEY="xxx"
 MEMOBASE_PROJECT_URL="http://***.***.***.***:8019"
 
-# pref
-PRE_SPLIT_CHUNK=false  # pre split chunk in client end, for personamem and prefeval
-# 1. text_mem + pref_mem + instruction_completion: set INSTRUCT_COMPLETE=true, ABLATION_PREF=false
-# 2. text_mem + pref_mem: set INSTRUCT_COMPLETE=false, ABLATION_PREF=false
-# 3. text_mem: set INSTRUCT_COMPLETE=false, ABLATION_PREF=true
-INSTRUCT_COMPLETE=true  # use instruct complete format or not
-ABLATION_PREF=false  # remove pref mem, only text mem
+# eval settings
+PRE_SPLIT_CHUNK=false
 
 # Configuration Only For Scheduler
 # RabbitMQ Configuration
diff --git a/evaluation/scripts/PrefEval/pref_memos.py b/evaluation/scripts/PrefEval/pref_memos.py
index 753a77d99..7336d4612 100644
--- a/evaluation/scripts/PrefEval/pref_memos.py
+++ b/evaluation/scripts/PrefEval/pref_memos.py
@@ -72,7 +72,6 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
     """
     Processes a single line of data, searching memory based on the question.
     """
-    from utils.pref_mem_utils import create_mem_string
 
     i, line = line_data
     try:
@@ -94,7 +93,13 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
         start_time_search = time.monotonic()
         relevant_memories = mem_client.search(query=question, user_id=user_id, top_k=top_k_value)
         search_memories_duration = time.monotonic() - start_time_search
-        memories_str = create_mem_string(relevant_memories)
+        memories_str = (
+            "\n".join(
+                f"- {entry.get('memory', '')}"
+                for entry in relevant_memories["text_mem"][0]["memories"]
+            )
+            + f"\n{relevant_memories['pref_mem']}"
+        )
 
         memory_tokens_used = len(tokenizer.encode(memories_str))
 
@@ -119,7 +124,6 @@ def generate_response_for_line(line_data: tuple, openai_client: OpenAI, lib: str
     """
     Generates a response for a single line of data using pre-fetched memories.
     """
-    from utils.pref_mem_utils import add_pref_instruction, remove_pref_mem_from_mem_string
     from utils.prompts import PREFEVAL_ANSWER_PROMPT
 
     i, line = line_data
@@ -146,10 +150,7 @@ def generate_response_for_line(line_data: tuple, openai_client: OpenAI, lib: str
             )
             return original_data
 
-        memories_str = remove_pref_mem_from_mem_string(memories_str, frame=lib)
-
-        template = add_pref_instruction(PREFEVAL_ANSWER_PROMPT, frame=lib)
-        system_prompt = template.format(context=memories_str)
+        system_prompt = PREFEVAL_ANSWER_PROMPT.format(context=memories_str)
         messages = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": question},
diff --git a/evaluation/scripts/locomo/locomo_responses.py b/evaluation/scripts/locomo/locomo_responses.py
index 2ae4dcb6e..35a444b7d 100644
--- a/evaluation/scripts/locomo/locomo_responses.py
+++ b/evaluation/scripts/locomo/locomo_responses.py
@@ -35,10 +35,7 @@ async def locomo_response(frame, llm_client, context: str, question: str) -> str
             question=question,
         )
     else:
-        from utils.pref_mem_utils import add_pref_instruction
-
-        template = add_pref_instruction(ANSWER_PROMPT_MEMOS, frame=frame)
-        prompt = template.format(
+        prompt = ANSWER_PROMPT_MEMOS.format(
             context=context,
             question=question,
         )
@@ -55,8 +52,6 @@ async def locomo_response(frame, llm_client, context: str, question: str) -> str
 
 
 async def process_qa(frame, qa, search_result, oai_client):
-    from utils.pref_mem_utils import remove_pref_mem_from_mem_string
-
     start = time()
     query = qa.get("question")
     gold_answer = qa.get("answer")
@@ -64,7 +59,6 @@ async def process_qa(frame, qa, search_result, oai_client):
 
     context = search_result.get("context")
 
-    context = remove_pref_mem_from_mem_string(context, frame)
     answer = await locomo_response(frame, oai_client, context, query)
 
     response_duration_ms = (time() - start) * 1000
diff --git a/evaluation/scripts/locomo/locomo_search.py b/evaluation/scripts/locomo/locomo_search.py
index 19efb5b92..c629124dd 100644
--- a/evaluation/scripts/locomo/locomo_search.py
+++ b/evaluation/scripts/locomo/locomo_search.py
@@ -100,14 +100,19 @@ def memos_api_search(
     client, query, speaker_a_user_id, speaker_b_user_id, top_k, speaker_a, speaker_b
 ):
     from prompts import TEMPLATE_MEMOS
-    from utils.pref_mem_utils import create_mem_string
 
     start = time()
     search_a_results = client.search(query=query, user_id=speaker_a_user_id, top_k=top_k)
     search_b_results = client.search(query=query, user_id=speaker_b_user_id, top_k=top_k)
 
-    speaker_a_context = create_mem_string(search_a_results)
-    speaker_b_context = create_mem_string(search_b_results)
+    speaker_a_context = (
+        "\n".join([i["memory"] for i in search_a_results["text_mem"][0]["memories"]])
+        + f"\n{search_a_results['pref_mem']}"
+    )
+    speaker_b_context = (
+        "\n".join([i["memory"] for i in search_b_results["text_mem"][0]["memories"]])
+        + f"\n{search_b_results['pref_mem']}"
+    )
 
     context = TEMPLATE_MEMOS.format(
         speaker_1=speaker_a,
diff --git a/evaluation/scripts/locomo/prompts.py b/evaluation/scripts/locomo/prompts.py
index caf462f6a..152e5b87f 100644
--- a/evaluation/scripts/locomo/prompts.py
+++ b/evaluation/scripts/locomo/prompts.py
@@ -1,14 +1,3 @@
-import os
-
-
-PREF_INSTRUCTIONS = """
-    # Note:
-    Plaintext memory are summaries of facts, while preference memories are summaries of user preferences.
-    Your response must not violate any of the user's preferences, whether explicit or implicit, and briefly explain why you answer this way to avoid conflicts.
-    When encountering preference conflicts, the priority is: explicit preference > implicit preference > plaintext memory.
-"""
-
-
 ANSWER_PROMPT_MEM0 = """
     You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories.
 
@@ -114,7 +103,7 @@
    5. Formulate a precise, concise answer based on the evidence from the memories (and allowed world knowledge).
    6. Double-check that your answer directly addresses the question asked and adheres to all instructions.
    7. Ensure your final answer is specific and avoids vague time references.
-   {pref_instructions}
+
    {context}
 
    Question: {question}
@@ -122,10 +111,6 @@
    Answer:
    """
 
-if os.getenv("INSTRUCT_COMPLETE") == "true":
-    ANSWER_PROMPT_MEMOS = ANSWER_PROMPT_MEMOS.replace("{pref_instructions}", PREF_INSTRUCTIONS)
-else:
-    ANSWER_PROMPT_MEMOS = ANSWER_PROMPT_MEMOS.replace("{pref_instructions}", "")
 
 custom_instructions = """
 Generate personal memories that follow these guidelines:
diff --git a/evaluation/scripts/longmemeval/lme_responses.py b/evaluation/scripts/longmemeval/lme_responses.py
index 22f17c304..a4adf90b5 100644
--- a/evaluation/scripts/longmemeval/lme_responses.py
+++ b/evaluation/scripts/longmemeval/lme_responses.py
@@ -12,13 +12,11 @@
 
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from utils.pref_mem_utils import add_pref_instruction, remove_pref_mem_from_mem_string
 from utils.prompts import LME_ANSWER_PROMPT
 
 
-def lme_response(llm_client, context, question, question_date, frame):
-    template = add_pref_instruction(LME_ANSWER_PROMPT, frame=frame)
-    prompt = template.format(
+def lme_response(llm_client, context, question, question_date):
+    prompt = LME_ANSWER_PROMPT.format(
         question=question,
         question_date=question_date,
         context=context,
@@ -35,14 +33,13 @@ def lme_response(llm_client, context, question, question_date, frame):
     return result
 
 
-def process_qa(user_id, search_result, llm_client, frame):
+def process_qa(user_id, search_result, llm_client):
     start = time()
     search_result = search_result[0]
     question = search_result.get("question")
     question_date = search_result.get("date")
     context = search_result.get("search_context", "")
-    context = remove_pref_mem_from_mem_string(context, frame=frame)
-    anwer = lme_response(llm_client, context, question, question_date, frame)
+    anwer = lme_response(llm_client, context, question, question_date)
 
     response_duration_ms = (time() - start) * 1000
 
@@ -97,7 +94,7 @@ def main(frame, version, num_workers=4):
         future_to_user_id = {}
 
         for user_id, search_results in lme_search_results.items():
-            future = executor.submit(process_qa, user_id, search_results, oai_client, frame)
+            future = executor.submit(process_qa, user_id, search_results, oai_client)
             future_to_user_id[future] = user_id
 
         for future in tqdm(
diff --git a/evaluation/scripts/longmemeval/lme_search.py b/evaluation/scripts/longmemeval/lme_search.py
index d21795eef..c02518083 100644
--- a/evaluation/scripts/longmemeval/lme_search.py
+++ b/evaluation/scripts/longmemeval/lme_search.py
@@ -13,7 +13,6 @@
 import pandas as pd
 
 from tqdm import tqdm
-from utils.pref_mem_utils import create_mem_string
 from utils.prompts import (
     MEM0_CONTEXT_TEMPLATE,
     MEM0_GRAPH_CONTEXT_TEMPLATE,
@@ -45,7 +44,10 @@ def mem0_search(client, query, user_id, top_k):
 def memos_search(client, query, user_id, top_k):
     start = time()
     results = client.search(query=query, user_id=user_id, top_k=top_k)
-    context = create_mem_string(results)
+    context = (
+        "\n".join([i["memory"] for i in results["text_mem"][0]["memories"]])
+        + f"\n{results['pref_mem']}"
+    )
     context = MEMOS_CONTEXT_TEMPLATE.format(user_id=user_id, memories=context)
     duration_ms = (time() - start) * 1000
     return context, duration_ms
diff --git a/evaluation/scripts/personamem/pm_responses.py b/evaluation/scripts/personamem/pm_responses.py
index 5b54f9bb8..ff561f8d8 100644
--- a/evaluation/scripts/personamem/pm_responses.py
+++ b/evaluation/scripts/personamem/pm_responses.py
@@ -14,7 +14,6 @@
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import re
 
-from utils.pref_mem_utils import add_pref_instruction, remove_pref_mem_from_mem_string
 from utils.prompts import PM_ANSWER_PROMPT
 
 
@@ -49,9 +48,8 @@ def _extract_only_options(text):
     return False, predicted_answer
 
 
-def pm_response(llm_client, context, question, options, frame):
-    template = add_pref_instruction(PM_ANSWER_PROMPT, frame=frame)
-    prompt = template.format(
+def pm_response(llm_client, context, question, options):
+    prompt = PM_ANSWER_PROMPT.format(
         question=question,
         context=context,
         options=options,
@@ -68,19 +66,17 @@ def pm_response(llm_client, context, question, options, frame):
     return result
 
 
-def process_qa(user_id, search_result, num_runs, llm_client, frame):
+def process_qa(user_id, search_result, num_runs, llm_client):
     search_result = search_result[0]
     question = search_result.get("question")
     context = search_result.get("search_context", "")
     options = search_result.get("all_options", [])
 
-    context = remove_pref_mem_from_mem_string(context, frame=frame)
-
     run_results = []
 
     for idx in range(num_runs):
         start = time()
-        answer = pm_response(llm_client, context, question, options, frame)
+        answer = pm_response(llm_client, context, question, options)
         is_correct, answer = extract_choice_answer(answer, search_result.get("golden_answer", ""))
         response_duration_ms = (time() - start) * 1000
 
@@ -154,9 +150,7 @@ def main(frame, version, num_runs=3, num_workers=4):
         future_to_user_id = {}
 
         for user_id, search_results in pm_search_results.items():
-            future = executor.submit(
-                process_qa, user_id, search_results, num_runs, oai_client, frame
-            )
+            future = executor.submit(process_qa, user_id, search_results, num_runs, oai_client)
             future_to_user_id[future] = user_id
 
         for future in tqdm(
diff --git a/evaluation/scripts/personamem/pm_search.py b/evaluation/scripts/personamem/pm_search.py
index 243c64589..c18e05623 100644
--- a/evaluation/scripts/personamem/pm_search.py
+++ b/evaluation/scripts/personamem/pm_search.py
@@ -14,7 +14,6 @@
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
-from utils.pref_mem_utils import create_mem_string
 from utils.prompts import (
     MEM0_CONTEXT_TEMPLATE,
     MEM0_GRAPH_CONTEXT_TEMPLATE,
@@ -83,7 +82,10 @@ def memobase_search(client, query, user_id, top_k):
 def memos_search(client, user_id, query, top_k):
     start = time()
     results = client.search(query=query, user_id=user_id, top_k=top_k)
-    search_memories = create_mem_string(results)
+    search_memories = (
+        "\n".join(item["memory"] for cube in results["text_mem"] for item in cube["memories"])
+        + f"\n{results['pref_mem']}"
+    )
     context = MEMOS_CONTEXT_TEMPLATE.format(user_id=user_id, memories=search_memories)
 
     duration_ms = (time() - start) * 1000
diff --git a/evaluation/scripts/utils/pref_mem_utils.py b/evaluation/scripts/utils/pref_mem_utils.py
deleted file mode 100644
index 22a5bb86c..000000000
--- a/evaluation/scripts/utils/pref_mem_utils.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import os
-import sys
-
-
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-from prompts import PREF_INSTRUCTIONS
-
-
-def create_mem_string(relevant_memories) -> str:
-    text_memories = []
-    explicit = []
-    implicit = []
-    for item in relevant_memories["text_mem"]:
-        for mem in item["memories"]:
-            text_memories.append(mem["memory"])
-    text_memories_text = "\n".join(f"{i + 1}. {mem}" for i, mem in enumerate(text_memories)).strip()
-    text_context = f"Plaintext Memory:\n{text_memories_text}\n" if text_memories_text else ""
-
-    for item in relevant_memories.get("prefs", []):
-        for mem in item["memories"]:
-            if mem["metadata"]["preference_type"] == "explicit_preference":
-                explicit.append(mem["metadata"]["explicit_preference"])
-            elif mem["metadata"]["preference_type"] == "implicit_preference":
-                implicit.append(mem["metadata"]["implicit_preference"])
-    explicit_text = "\n".join(f"{i + 1}. {pref}" for i, pref in enumerate(explicit)).strip()
-    explicit_context = f"Explicit Preference:\n{explicit_text}\n" if explicit_text else ""
-    implicit_text = "\n".join(f"{i + 1}. {pref}" for i, pref in enumerate(implicit)).strip()
-    implicit_context = f"Implicit Preference:\n{implicit_text}\n" if implicit_text else ""
-    return text_context + explicit_context + implicit_context
-
-
-def remove_pref_mem_from_mem_string(mem_string: str, frame: str) -> str:
-    if os.getenv("ABLATION_PREF", "false").lower() == "true" and frame == "memos-api":
-        tmp_list = mem_string.split("Plaintext Memory:")
-        if len(tmp_list) > 1:
-            return tmp_list[1].split("Explicit Preference:")[0]
-    return mem_string
-
-
-def add_pref_instruction(template: str, frame: str):
-    if os.getenv("INSTRUCT_COMPLETE", "false").lower() == "true" and frame == "memos-api":
-        return template.replace("{pref_instructions}", PREF_INSTRUCTIONS)
-    return template.replace("{pref_instructions}", "")
diff --git a/evaluation/scripts/utils/prompts.py b/evaluation/scripts/utils/prompts.py
index 902bbb1be..32e6d6729 100644
--- a/evaluation/scripts/utils/prompts.py
+++ b/evaluation/scripts/utils/prompts.py
@@ -1,11 +1,3 @@
-PREF_INSTRUCTIONS = """
-    # Note:
-    Plaintext memory are summaries of facts, while preference memories are summaries of user preferences.
-    Your response must not violate any of the user's preferences, whether explicit or implicit, and briefly explain why you answer this way to avoid conflicts.
-    When encountering preference conflicts, the priority is: explicit preference > implicit preference > plaintext memory.
-"""
-
-
 LME_ANSWER_PROMPT = """
     You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories.
 
@@ -25,7 +17,7 @@
     5. Formulate a precise, concise answer based solely on the evidence in the memories.
     6. Double-check that your answer directly addresses the question asked.
     7. Ensure your final answer is specific and avoids vague time references.
-    {pref_instructions}
+
     {context}
 
     Current Date: {question_date}
@@ -55,7 +47,7 @@
     - Your final answer **must use parentheses**, like (a) or (b).
     - Do NOT list multiple choices. Choose only one.
     - Do NOT include extra text after <final_answer>. Just output the answer.
-    {pref_instructions}
+
     # QUESTION:
     {question}
 
@@ -71,7 +63,6 @@
     You are a helpful AI. Answer the question based on the query and the following memories:
     User Memories:
     {context}
-    {pref_instructions}
 """
 
 
diff --git a/src/memos/vec_dbs/milvus.py b/src/memos/vec_dbs/milvus.py
index fb19fd6ff..c1cb26362 100644
--- a/src/memos/vec_dbs/milvus.py
+++ b/src/memos/vec_dbs/milvus.py
@@ -138,7 +138,7 @@ def search(
 
             items.append(
                 MilvusVecDBItem(
-                    id=str(hit["id"]),
+                    id=str(entity.get("id")),
                     memory=entity.get("memory"),
                     vector=entity.get("vector"),
                     payload=entity.get("payload", {}),