|
1 | 1 | # -*- coding: utf-8 -*- |
2 | | -"""Compute relative rewards based on extraversion personality alignment.""" |
| 2 | +"""Compute relative rewards based on extraversion personality alignment using OpenJudge.""" |
3 | 3 |
|
| 4 | +import os |
4 | 5 | from typing import List, Dict |
5 | 6 | from beast_logger import print_listofdict |
| 7 | +from openjudge.graders.base_grader import GraderMode, GraderScore, GraderRank |
| 8 | +from openjudge.graders.llm_grader import LLMGrader |
| 9 | +from openjudge.models import OpenAIChatModel |
6 | 10 |
|
7 | | -def score_extraversion(response_text: str) -> float: |
8 | | - """Score response for extraversion traits (1-10 scale).""" |
9 | | - extraversion_keywords = [ |
10 | | - 'excited', 'love', 'amazing', 'awesome', 'fantastic', 'great', |
11 | | - 'wonderful', 'thrilled', 'energetic', 'enthusiastic', 'fun', |
12 | | - 'social', 'outgoing', 'active', 'lively', 'vibrant', 'happy', |
13 | | - 'enjoy', 'delighted', 'cheerful', 'positive' |
14 | | - ] |
| 11 | +# Configuration |
| 12 | +REWARD_MODE = os.getenv("REWARD_MODE", "pointwise") # Options: pointwise, listwise |
| 13 | +API_KEY = os.getenv("DASHSCOPE_API_KEY", "sk-xxx") |
| 14 | +BASE_URL = os.getenv("JUDGE_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1") |
| 15 | +JUDGE_MODEL = os.getenv("JUDGE_MODEL", "qwen-plus") |
15 | 16 |
|
16 | | - text_lower = response_text.lower() |
17 | | - score = 5.0 |
| 17 | +# OpenJudge grader setup |
| 18 | +judge_model = OpenAIChatModel( |
| 19 | + model=JUDGE_MODEL, |
| 20 | + api_key=API_KEY, |
| 21 | + base_url=BASE_URL, |
| 22 | +) |
18 | 23 |
|
19 | | - for keyword in extraversion_keywords: |
20 | | - if keyword in text_lower: |
21 | | - score += 0.5 |
| 24 | +EXTRAVERSION_PROMPT = """You are evaluating responses for extraversion personality traits. |
22 | 25 |
|
23 | | - score += min(response_text.count('!') * 0.3, 2.0) |
| 26 | +Extraversion characteristics include: |
| 27 | +- Outgoing, energetic, enthusiastic tone |
| 28 | +- Social engagement and excitement |
| 29 | +- Positive, upbeat language |
| 30 | +- Action-oriented expressions |
| 31 | +- Use of exclamation marks and emotional words |
24 | 32 |
|
25 | | - if len(response_text) < 50: |
26 | | - score -= 1.0 |
| 33 | +Rate the response on a scale of 0.0-1.0: |
| 34 | +0.0 = Highly introverted (reserved, quiet, minimal emotion) |
| 35 | +1.0 = Highly extraverted (energetic, enthusiastic, very expressive) |
27 | 36 |
|
28 | | - return max(1.0, min(10.0, score)) |
| 37 | +Question: {question} |
| 38 | +Response: {response} |
29 | 39 |
|
30 | | -async def on_compute_relative_reward(valid_results: List, all_answers: List[Dict]) -> List[float]: |
31 | | - """Compute relative rewards for extraversion alignment.""" |
| 40 | +Return a json object with exactly two fields: |
| 41 | +- "score": float between 0.0 and 1.0 |
| 42 | +- "reason": brief explanation""" |
| 43 | + |
| 44 | +def build_listwise_template(n: int) -> str: |
| 45 | + """Build a listwise prompt template for n responses.""" |
| 46 | + answers_block = "\n".join([f"{i+1}. {{answer_{i+1}}}" for i in range(n)]) |
| 47 | + return f"""You are ranking multiple responses based on extraversion personality traits. |
| 48 | +
|
| 49 | +Extraversion characteristics include: |
| 50 | +- Outgoing, energetic, enthusiastic tone |
| 51 | +- Social engagement and excitement |
| 52 | +- Positive, upbeat language |
| 53 | +- Action-oriented expressions |
| 54 | +
|
| 55 | +Question: {{question}} |
| 56 | +
|
| 57 | +Responses to rank: |
| 58 | +{answers_block} |
| 59 | +
|
| 60 | +Rank these responses from most extraverted to least extraverted. |
| 61 | +Return a json object with exactly two fields: |
| 62 | +- "rank": list of integers (1-indexed) ordered from most to least extraverted, e.g. [2, 1, 3] |
| 63 | +- "reason": brief explanation of the ranking""" |
| 64 | + |
| 65 | +pointwise_grader = LLMGrader( |
| 66 | + name="extraversion_pointwise", |
| 67 | + mode=GraderMode.POINTWISE, |
| 68 | + description="Evaluate extraversion traits", |
| 69 | + model=judge_model, |
| 70 | + template=EXTRAVERSION_PROMPT, |
| 71 | +) |
| 72 | + |
| 73 | + |
| 74 | +async def compute_pointwise_rewards(question: str, all_answers: List[Dict]) -> List[float]: |
| 75 | + """Compute rewards using OpenJudge pointwise grading.""" |
32 | 76 | scores = [] |
33 | 77 | for answer in all_answers: |
34 | 78 | content = answer.get("content", "") |
35 | | - raw_score = score_extraversion(content) |
36 | | - normalized = (raw_score - 5.5) / 4.5 |
37 | | - scores.append(normalized) |
38 | | - answer["reward"] = normalized |
| 79 | + result = await pointwise_grader.aevaluate(question=question, response=content) |
| 80 | + if isinstance(result, GraderScore): |
| 81 | + # score is already normalized 0-1 by OpenJudge |
| 82 | + score = result.score |
| 83 | + else: |
| 84 | + score = 0.0 |
| 85 | + scores.append(score) |
| 86 | + answer["reward"] = score |
| 87 | + return scores |
| 88 | + |
| 89 | + |
| 90 | +async def compute_listwise_rewards(question: str, all_answers: List[Dict]) -> List[float]: |
| 91 | + """Compute rewards using OpenJudge listwise ranking.""" |
| 92 | + n = len(all_answers) |
| 93 | + template = build_listwise_template(n) |
| 94 | + grader = LLMGrader( |
| 95 | + name="extraversion_listwise", |
| 96 | + mode=GraderMode.LISTWISE, |
| 97 | + description="Rank responses by extraversion", |
| 98 | + model=judge_model, |
| 99 | + template=template, |
| 100 | + ) |
| 101 | + kwargs = {"question": question} |
| 102 | + for i, ans in enumerate(all_answers): |
| 103 | + kwargs[f"answer_{i+1}"] = ans.get("content", "") |
| 104 | + |
| 105 | + result = await grader.aevaluate(**kwargs) |
| 106 | + |
| 107 | + scores = [0.0] * n |
| 108 | + if isinstance(result, GraderRank): |
| 109 | + # rank is a list of 1-indexed positions ordered best to worst |
| 110 | + # convert to reward: rank 1 (best) -> 1.0, rank n (worst) -> 0.0 |
| 111 | + for position, idx in enumerate(result.rank): |
| 112 | + scores[idx - 1] = 1.0 - (position / (n - 1)) if n > 1 else 0.5 |
| 113 | + |
| 114 | + for answer, score in zip(all_answers, scores): |
| 115 | + answer["reward"] = score |
| 116 | + return scores |
| 117 | + |
| 118 | + |
| 119 | +async def on_compute_relative_reward(valid_results: List, all_answers: List[Dict]) -> List[float]: |
| 120 | + """Compute relative rewards for extraversion alignment.""" |
| 121 | + question = valid_results[0].get("question", "") if valid_results else "" |
| 122 | + |
| 123 | + if REWARD_MODE == "listwise": |
| 124 | + scores = await compute_listwise_rewards(question, all_answers) |
| 125 | + else: # pointwise (default) |
| 126 | + scores = await compute_pointwise_rewards(question, all_answers) |
39 | 127 |
|
40 | | - print_listofdict(all_answers, header="on_compute_relative_reward") |
| 128 | + print_listofdict(all_answers, header=f"on_compute_relative_reward (mode={REWARD_MODE})") |
41 | 129 | return scores |
0 commit comments