ThinkSync/openenv.yaml at main · White-D-coder/ThinkSync · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
name: ThinkSync
version: 1.0.0
description: AI Content Moderation Environment

environment:
  module: engine
  class: MyEnv
  entry_point: inference.py

tasks:
  - id: easy_detection
    name: Easy Detection
    description: Simple toxicity and spam detection
    difficulty: EASY
    grader: grader.grade_easy_detection
  - id: medium_classification
    name: Medium Classification
    description: Contextual sentiment analysis — sarcasm vs harassment
    difficulty: MEDIUM
    grader: grader.grade_medium_classification
  - id: hard_contextual
    name: Hard Contextual
    description: Multi-turn moderation with coded language detection
    difficulty: HARD
    grader: grader.grade_hard_contextual

action_space:
  type: structured
  schema:
    content_id: { type: string }
    action_type: { type: string }
    reasoning_chain: { type: string }
    confidence_score: { type: number }

observation_space:
  type: structured
  schema:
    id: { type: string }
    content: { type: string }
    content_queue: { type: array }
    moderation_log: { type: array }
    step_count: { type: integer }
    cumulative_reward: { type: number }
    done: { type: boolean }
    metadata: { type: object }

reward:
  type: continuous
  range: [0.0, 1.0]

baseline:
  model: "Qwen/Qwen2.5-72B-Instruct"
  avg_reward: 0.62
  total_reward: 7.44

runtime:
  cpu: 2
  ram_gb: 8
  timeout: 1200
  image: "python:3.11-slim"

log_format:
  start: "[START]"
  step: "[STEP] step={n} reward={r:.2f}"
  end: "[END] success={success} total_steps={n} final_score={s:.3f}"