Skip to content

Commit 47812cb

Browse files
committed
revise pro-trans
1 parent f1edf19 commit 47812cb

File tree

7 files changed

+94
-76
lines changed

7 files changed

+94
-76
lines changed

ajet/context_tracker/multiagent_tracking.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def save_llm_interaction_timeline(self, tools, llm_ext_msg, timeline):
294294
# save to self.saved_timelines
295295
self.saved_timelines += [copy.deepcopy(timeline)]
296296

297-
# DEBUG = True # warn when merge fails
297+
# warn when merge fails
298298
timeline_merging_policy: TimelineMergingPolicyConfig = self.config.ajet.context_tracker.timeline_merging_policy
299299
if (
300300
self.config.ajet.context_tracker.detect_timeline_snap

ajet/task_runner/swarm_runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
from ajet import Workflow
1717
from typing import Callable
1818

19-
DEBUG = True
19+
# DEBUG = True
20+
DEBUG = False
2021

2122
context = zmq.Context()
2223
atexit.register(context.term)

ajet/tuner_lib/weight_tuner/experimental/as_oai_model_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
if TYPE_CHECKING:
2323
from ajet.context_tracker.multiagent_tracking import MultiAgentContextTracker
2424

25-
# DEBUG = False
26-
DEBUG = True
25+
DEBUG = False
26+
# DEBUG = True
2727

2828
def generate_auth_token(agent_name, target_tag, episode_uuid, episode_address):
2929
"""

ajet/tuner_lib/weight_tuner/experimental/as_oai_model_server.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ class HealthCheckRequest(BaseModel):
5454

5555
# Create FastAPI app
5656
SERVER_SHUTDOWN_EVENT = threading.Event()
57-
# DEBUG = False
58-
DEBUG = True
57+
DEBUG = False
58+
# DEBUG = True
5959

6060
context = zmq.Context()
6161
atexit.register(context.term)

ajet/tuner_lib/weight_tuner/experimental/as_swarm_server.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
VALID_STATUSES,
2727
)
2828

29-
DEBUG = True
29+
# DEBUG = True
30+
DEBUG = False
3031
RCVTIMEO = 2 * 1000
3132
RCVTIMEO_OUT = 300 * 1000
3233
RCVTIMEO_WAIT_N = RCVTIMEO_OUT // RCVTIMEO

tutorial/example_academic_trans/trans.py

Lines changed: 63 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
import os
44
import time
55
import asyncio
6-
import requests
76
import threading
87
from loguru import logger
98
from textwrap import dedent
9+
from openai import OpenAI
1010

1111
from ajet import WorkflowOutput
1212
from ajet.schema.task import Task
@@ -22,18 +22,6 @@
2222
from .trans_reward import TranslationQualityGrader, build_translation_quality_messages, examples
2323

2424

25-
LOCAL_DATASET_PATH = "/mnt/data_cpfs/qingxu.fu/agentjet/agentjet/tmp/arxiv_papers/train.parquet"
26-
27-
28-
# Handshake with swarm remote, then send training param to swarm remote (such as model to be trained, algorithm, etc)
29-
dataset = RouterTaskReader(
30-
reader_type = "huggingface_dat_repo",
31-
reader_config = AjetTaskReader(
32-
huggingface_dat_repo = HuggingfaceDatRepo(
33-
dataset_path = LOCAL_DATASET_PATH
34-
)
35-
)
36-
)
3725

3826
@retry_with_backoff(max_retry=3)
3927
def execute_agent(task: Task, api_baseurl_key: OpenaiBaseUrlAndApiKey):
@@ -48,68 +36,91 @@ def execute_agent(task: Task, api_baseurl_key: OpenaiBaseUrlAndApiKey):
4836
messages, rough_translate = rough_translate_agent(base_url, api_key, abstract)
4937
# print_listofdict(messages, header="rough_translate_agent", mod="c")
5038

51-
messages, fix_nouns = detect_hard_proper_nouns(messages, base_url, api_key, abstract, rough_translate)
39+
# messages, fix_nouns = detect_hard_proper_nouns(messages, base_url, api_key, abstract, rough_translate)
40+
messages, fix_nouns = detect_hard_proper_nouns(messages, grader_base_url, grader_api_key, abstract, rough_translate)
5241
# print_listofdict(messages, header="detect_hard_proper_nouns", mod="c")
5342

5443
messages, final_translation = produce_final_translation(messages, base_url, api_key, abstract, rough_translate, fix_nouns)
5544
print_listofdict(messages, header="final_translation", mod="c")
5645

57-
grader = TranslationQualityGrader(
58-
model=OpenAIChatModel(base_url=grader_base_url, api_key=grader_api_key, model="qwen-max")
59-
)
60-
grader_score = asyncio.run(grader.aevaluate(original_text=abstract, translation=final_translation))
61-
raw_reward = grader_score.score # Normalize to 0-1 range (score is 0-3)
46+
if final_translation is None:
47+
raw_reward = 0.0
48+
else:
49+
grader = TranslationQualityGrader(
50+
model=OpenAIChatModel(base_url=grader_base_url, api_key=grader_api_key, model="qwen3-max-2026-01-23")
51+
)
52+
grader_score = asyncio.run(grader.aevaluate(original_text=abstract, translation=final_translation))
53+
raw_reward = grader_score.score
54+
print(f"Grader Score: {grader_score.score}, Reason: {grader_score.reason}, Metadata: {grader_score.metadata}")
6255
return WorkflowOutput(reward=raw_reward, metadata={
6356
"rough_translate": rough_translate,
6457
"fix_nouns": fix_nouns,
6558
"final_translation": final_translation
6659
})
6760

6861

69-
def detect_hard_proper_nouns(messages, base_url, api_key, abstract, rough_translate):
62+
def produce_final_translation(messages, base_url, api_key, abstract, rough_translate, fix_nouns):
7063
messages = messages + [
71-
7264
{
7365
"role": "user",
74-
"content": "You new job is to detect translation errors of discipline-specific proper nouns. "
75-
"Use json to list all errors found in the translation result and provide correction. "
76-
"Json format: [{\"original_word\": \"xxx\", \"wrong_translation\": \"xxx\", \"wrong_reason\": \"xxx\", \"correct_translation\": \"xxx\"}, ...]. "
77-
"If no errors are found, return an empty list []."
78-
"Please list all translation errors of discipline-specific proper nouns found in the translation result according to the requirements."
66+
"content": "Please produce the final, corrected Chinese translation by applying all the corrections listed above. "
67+
"Output only the final translation between <final_result> ... </final_result>, so I will extract result with regex."
7968
},
8069
]
8170

82-
response = requests.post( f"{base_url}/chat/completions", json = { "model": "qwen-turbo", "messages": messages, }, headers = { "Authorization": f"Bearer {api_key}" } )
83-
fix_nouns = response.json()['choices'][0]['message']['content']
71+
client = OpenAI(base_url=base_url, api_key=api_key)
72+
response = client.chat.completions.create(
73+
model="agentjet-model",
74+
messages=messages
75+
)
76+
final_translation = response.choices[0].message.content
77+
8478
messages += [
8579
{
8680
"role": "assistant",
87-
"content": fix_nouns
81+
"content": final_translation
8882
}
8983
]
90-
return messages, fix_nouns
9184

85+
# Extract final translation
86+
match = re.search(r"<final_result>(.*?)</final_result>", final_translation, re.DOTALL)
87+
if match:
88+
final_translation = match.group(1).strip()
89+
else:
90+
final_translation = None
9291

93-
def produce_final_translation(messages, base_url, api_key, abstract, rough_translate, fix_nouns):
92+
return messages, final_translation
93+
94+
95+
96+
def detect_hard_proper_nouns(messages, base_url, api_key, abstract, rough_translate):
9497
messages = messages + [
98+
9599
{
96100
"role": "user",
97-
"content": "Please produce the final, corrected Chinese translation by applying all the corrections listed above. "
98-
"Output only the final translation without any explanations or additional text."
101+
"content": "You new job is to detect translation errors of discipline-specific proper nouns. "
102+
"Use json to list all errors found in the translation result and provide correction. "
103+
"Json format: [{\"original_word\": \"xxx\", \"wrong_translation\": \"xxx\", \"wrong_reason\": \"xxx\", \"correct_translation\": \"xxx\"}, ...]. "
104+
"If no errors are found, return an empty list []."
105+
"Please list all translation errors of discipline-specific proper nouns found in the translation result according to the requirements."
99106
},
100-
]
101107

102-
response = requests.post( f"{base_url}/chat/completions", json = { "model": "qwen-turbo", "messages": messages, }, headers = { "Authorization": f"Bearer {api_key}" } )
103-
final_translation = response.json()['choices'][0]['message']['content']
108+
]
104109

110+
client = OpenAI(base_url=base_url, api_key=api_key)
111+
response = client.chat.completions.create(
112+
model="qwen3-max-2026-01-23",
113+
messages=messages,
114+
extra_body={"enable_thinking":True}
115+
)
116+
fix_nouns = response.choices[0].message.content
105117
messages += [
106118
{
107119
"role": "assistant",
108-
"content": final_translation
120+
"content": fix_nouns
109121
}
110122
]
111-
112-
return messages, final_translation
123+
return messages, fix_nouns
113124

114125

115126
def rough_translate_agent(base_url, api_key, abstract):
@@ -123,9 +134,12 @@ def rough_translate_agent(base_url, api_key, abstract):
123134
"such as conforming to the logic of the Chinese language, being simple, rigorous, and concise, "
124135
"and avoiding the use of first-person pronouns when passive voice is appropriate. "
125136
"Ensure that specialized terms are translated correctly according to academic standards. "
126-
"Replace 我们 with 本研究 or 本文. "
127-
"If an abbreviation is short in Chinese, use Chinese. "
128-
"If an abbreviation is long in Chinese, use abbreviation. "
137+
"Replace 我/我们 with 本研究 or 本文 or 研究者 or simply remove it and rephrase the sentence. "
138+
"If an English abbreviation is short in Chinese, use Chinese. "
139+
"If an English abbreviation is long in Chinese, use English abbreviation. "
140+
"To use an English abbreviation, if the author has mentioned the full form first, mention the full form at its first appearance. "
141+
"e.g. `We have used the LAsMA heterodyne array installed on the Atacama Pathfinder EXperiment (APEX)` should be translated as "
142+
"`本研究使用了安装在阿塔卡马探路者实验望远镜(APEX, Atacama Pathfinder EXperiment)上的LAsMA外差阵列`. "
129143
},
130144
{
131145
"role": "user",
@@ -135,8 +149,13 @@ def rough_translate_agent(base_url, api_key, abstract):
135149

136150
for ex in examples:
137151
messages[0]['content'] += f"\n\nExample:\n\tOriginal: {ex['original']}\n\tBad Translation: {ex['bad']}\n\tHint: {ex['hint']}\n\tGood Translation: {ex['good']}"
138-
response = requests.post( f"{base_url}/chat/completions", json = { "model": "qwen-turbo", "messages": messages, }, headers = { "Authorization": f"Bearer {api_key}" } )
139-
rough_translate = response.json()['choices'][0]['message']['content']
152+
153+
client = OpenAI(base_url=base_url, api_key=api_key)
154+
response = client.chat.completions.create(
155+
model="agentjet-model",
156+
messages=messages
157+
)
158+
rough_translate = response.choices[0].message.content
140159
messages += [
141160
{
142161
"role": "assistant",
@@ -145,18 +164,3 @@ def rough_translate_agent(base_url, api_key, abstract):
145164
]
146165

147166
return messages, rough_translate
148-
149-
150-
151-
if __name__ == "__main__":
152-
153-
for i, task in enumerate(dataset.generate_training_tasks()):
154-
execute_agent(
155-
task,
156-
OpenaiBaseUrlAndApiKey(
157-
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
158-
api_key=os.environ.get("DASHSCOPE_API_KEY", "")
159-
)
160-
)
161-
162-

tutorial/example_academic_trans/trans_reward.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from openjudge.models.base_chat_model import BaseChatModel
55
from typing import List
66
from textwrap import dedent
7+
from beast_logger import print_listofdict
78

89

910
examples = [
@@ -68,15 +69,19 @@ def get_translation_quality_system_prompt() -> str:
6869
return dedent("""
6970
You are an objective translation quality evaluator for academic paper translations from English to Chinese. Your task is to identify ONLY the specific types of errors demonstrated in the provided examples - not general translation quality issues.
7071
71-
Focus (but do not limit to) on issues below (as shown in the examples):
72+
重点关注(但不限于)以下问题类型(如示例所示):
7273
73-
1. **First-person pronoun issues** - Using "我们" instead of "本研究" or "本文" in academic contexts
74-
2. **Abbreviation translation errors** - Using abbreviations when concise Chinese exists (e.g., "GWs" instead of "引力波"), or translating abbreviations that should remain in English (like "EMBB")
75-
3. **Word order problems** - Not adjusting sentence structure to emphasize key points in Chinese academic style
76-
4. **Subject-verb inconsistencies** - Mismatched subjects due to improper sentence structure (e.g., "在...中,本文展示..." where the subject is confused)
77-
5. **Inappropriate word choices** - Using colloquial or incorrect terms instead of proper academic expressions (e.g., "效率" vs "有效性" in certain contexts)
78-
6. **Redundant punctuation** - Unnecessary commas or other punctuation that disrupts Chinese reading flow
74+
1. **错误使用第一人称代词** - 禁止使用"我们"。正确的方法是使用"本研究""本文"、“研究者”,或者直接删除we并改写句子替换主语。不要漏掉出现的任何第一人称代词。
75+
2. **缩写翻译错误** - 当存在简洁的中文表达时使用缩写(例如,使用"GWs"而非"引力波"),或翻译本应保留英文的缩写(如"EMBB"
76+
3. **语序问题** - 未调整句子结构以符合中文学术风格强调重点的习惯
77+
4. **主谓不一致、主语缺失** - 由于句子结构不当导致主语混乱(例如,"在...中,本文展示..."中主语混淆)
78+
5. **用词不当** - 使用口语化或不正确的术语而非恰当的学术表达
79+
6. **多余标点和停顿** - 不必要的逗号或其他标点符号影响中文阅读流畅性
7980
7. **主语不清晰** - 中文句子主语缺失或不明确。例如:“通过该实验,证明了该药物对癌细胞有抑制作用”(缺少主语)
81+
8. **缩写问题** - 首次出现自定义缩写、且原文中已经提供自定义缩写的英文全称时,没有在首次出现的地方提供英文全称。
82+
(正确的例子:`We have used the LAsMA heterodyne array installed on the Atacama Pathfinder EXperiment (APEX)`->`本研究使用了安装在阿塔卡马探路者实验望远镜(APEX, Atacama Pathfinder EXperiment)上的LAsMA外差阵列`)
83+
9. **专有名词翻译错误** - 领域特定的专有名词翻译错误,例如技术术语、学科术语等。如错把Agent翻译成“代理”(实际上应为“智能体”)等。
84+
10. **表意偏差** - 翻译结果与原文在意义上存在偏差,导致信息传达不准确。
8085
8186
**Examples of these errors:**
8287
[[examples_text]]
@@ -90,15 +95,19 @@ def get_translation_quality_system_prompt() -> str:
9095
* For each key issue found, provide the specific error, its type, and where it appears in the translation.
9196
* Be precise about which error category each issue belongs to.
9297
* Focus on objective errors matching the example patterns, not subjective preferences.
98+
* 当出现 **语序问题**、**主谓不一致、主语缺失**、**主语不清晰**、**专有名词翻译错误**、**表意偏差** 等严重问题时,直接给 0 分。
99+
* 逐句分析,切勿遗漏。
93100
94101
Think carefully before flagging any error. Ask yourself: Does this match one of the specific error types from the examples? Is this truly an objective error or just a stylistic preference?
95102
96103
Return your response in this format:
97-
<score>X</score>
98-
<reasoning>Your detailed step-by-step reasoning analyzing the translation against the error categories</reasoning>
104+
<reasoning>
105+
Your analysis
106+
</reasoning>
99107
<key_issues>
100108
- Error Type: [category]. Error: [specific issue]. Location: [where it appears in the translation]
101109
</key_issues>
110+
<score>X</score>
102111
103112
The score must be 0, 1, 2. Each key issue should be on its own line starting with a dash. If no errors are found, the key_issues section should be empty or state "None detected".
104113
""".replace("[[examples_text]]", examples_text))
@@ -129,7 +138,10 @@ def parse_translation_quality_response(text: str) -> dict:
129138

130139
def build_translation_quality_messages(original_text: str, translation: str) -> List[dict]:
131140
return [
132-
{"role": "system", "content": get_translation_quality_system_prompt()},
141+
{
142+
"role": "system",
143+
"content": get_translation_quality_system_prompt()
144+
},
133145
{
134146
"role": "user",
135147
"content": TRANSLATION_QUALITY_USER_PROMPT.format(

0 commit comments

Comments
 (0)