-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbaselines.py
More file actions
57 lines (54 loc) · 2.3 KB
/
baselines.py
File metadata and controls
57 lines (54 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# rlte/baselines.py
# Zwei Benchmark-Algorithmen für die Auswertung (Sec. 3.4): SL und TWAP.
# DR (Dirichlet-RL) ist im Paper nur Vergleich; hier bewusst NICHT implementiert (kein alternativer RL-Ansatz).
# :contentReference[oaicite:8]{index=8}
from __future__ import annotations
from typing import Dict, Tuple, List
import numpy as np
from .env_lob import LOBExecutionEnv
from . import config as C
def run_SL(market: str, M0: int, episodes: int = 1000) -> Tuple[float, float]:
"""
Submit & Leave: zu t=0 ganze Menge M0 als Limit-Sell zum Best-Ask (Sec. 3.4), Rest bei T per Market.
"""
rewards = []
for ep in range(episodes):
env = LOBExecutionEnv(market=market, M0=M0, seed=C.SEED + 1000 + ep)
s = env.reset()
# Aktion: a_K+1 simplex – alles auf a_k mit k="1 Tick über Best-Bid" (≈ Best-Ask bei Spread 1)
K = C.K_SIMPLEX
a = np.zeros(K + 1, dtype=np.float32)
# Best-Ask entspricht (bei Spread≈1) k=1; wir setzen fast alles auf a1, Rest Hold=0
a[1] = 1.0
for n in range(C.N_STEPS):
s, r, done, info = env.step(a)
if done:
break
# danach "halten"
a = np.zeros(K + 1, dtype=np.float32)
a[-1] = 1.0
rewards.append(r if 'r' in locals() else 0.0)
return float(np.mean(rewards)), float(np.std(rewards))
def run_TWAP(market: str, M0: int, episodes: int = 1000) -> Tuple[float, float]:
"""
TWAP: teile M0 in N Blöcke à M0/N; zu jedem Schritt Limit-Sell M0/N zum Best-Ask, Rest am Ende Market. (Sec. 3.4)
"""
rewards = []
block = int(round(M0 / C.N_STEPS))
for ep in range(episodes):
env = LOBExecutionEnv(market=market, M0=M0, seed=C.SEED + 2000 + ep)
s = env.reset()
K = C.K_SIMPLEX
a = np.zeros(K + 1, dtype=np.float32)
# Anteil block/M(t): Wir approximieren mit Simplex-Gewichten
for n in range(C.N_STEPS):
M = max(1, env.M_remaining)
frac = min(1.0, block / float(M))
a[:] = 0.0
a[1] = frac # Limit am Best-Ask (≈ k=1)
a[-1] = 1.0 - frac
s, r, done, info = env.step(a)
if done:
break
rewards.append(r if 'r' in locals() else 0.0)
return float(np.mean(rewards)), float(np.std(rewards))