forked from Anubhav741/ThinkSync
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathopenenv.yaml
More file actions
65 lines (57 loc) · 1.52 KB
/
openenv.yaml
File metadata and controls
65 lines (57 loc) · 1.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
name: ThinkSync
version: 1.0.0
description: AI Content Moderation Environment
environment:
module: engine
class: MyEnv
entry_point: inference.py
tasks:
- id: easy_detection
name: Easy Detection
description: Simple toxicity and spam detection
difficulty: EASY
grader: grader.grade_easy_detection
- id: medium_classification
name: Medium Classification
description: Contextual sentiment analysis — sarcasm vs harassment
difficulty: MEDIUM
grader: grader.grade_medium_classification
- id: hard_contextual
name: Hard Contextual
description: Multi-turn moderation with coded language detection
difficulty: HARD
grader: grader.grade_hard_contextual
action_space:
type: structured
schema:
content_id: { type: string }
action_type: { type: string }
reasoning_chain: { type: string }
confidence_score: { type: number }
observation_space:
type: structured
schema:
id: { type: string }
content: { type: string }
content_queue: { type: array }
moderation_log: { type: array }
step_count: { type: integer }
cumulative_reward: { type: number }
done: { type: boolean }
metadata: { type: object }
reward:
type: continuous
range: [0.0, 1.0]
baseline:
model: "Qwen/Qwen2.5-72B-Instruct"
avg_reward: 0.62
total_reward: 7.44
runtime:
cpu: 2
ram_gb: 8
timeout: 1200
image: "python:3.11-slim"
log_format:
start: "[START]"
step: "[STEP] step={n} reward={r:.2f}"
end: "[END] success={success} total_steps={n} final_score={s:.3f}"