Skip to content

Commit d36da7a

Browse files
authored
Add files via upload
1 parent a9e8b6a commit d36da7a

14 files changed

Lines changed: 657 additions & 0 deletions

File tree

checkpoints/trained_agent.pth

43.6 KB
Binary file not shown.

data/demo/cs_latency_dict.pkl

864 Bytes
Binary file not shown.

data/demo/green_trace.pkl

1.9 KB
Binary file not shown.

data/demo/invocations.pkl

8.46 MB
Binary file not shown.

data/demo/network_latency_map.pkl

92 Bytes
Binary file not shown.

lace_rl/__init__.py

Whitespace-only changes.

lace_rl/agent/__init__.py

Whitespace-only changes.

lace_rl/agent/dqn_agent.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# DQN implementation with two-region support and updated ServerlessEnv integration
2+
import torch
3+
import torch.nn as nn
4+
import torch.nn.functional as F
5+
import torch.optim as optim
6+
import numpy as np
7+
import random
8+
from collections import deque
9+
10+
class DQN(nn.Module):
11+
def __init__(self, state_dim, action_dim):
12+
super(DQN, self).__init__()
13+
self.net = nn.Sequential(
14+
nn.Linear(state_dim, 128),
15+
nn.ReLU(),
16+
nn.Linear(128, 64),
17+
nn.ReLU(),
18+
nn.Linear(64, action_dim)
19+
)
20+
21+
def forward(self, x):
22+
return self.net(x)
23+
24+
class DQNAgent:
25+
def __init__(
26+
self,
27+
state_dim,
28+
action_dim,
29+
lr=1e-3,
30+
gamma=0.99,
31+
epsilon=1.0,
32+
epsilon_decay=0.995,
33+
epsilon_min=0.05,
34+
buffer_size=10000,
35+
batch_size=64
36+
):
37+
self.state_dim = state_dim
38+
self.action_dim = action_dim
39+
self.gamma = gamma
40+
self.epsilon = epsilon
41+
self.epsilon_decay = epsilon_decay
42+
self.epsilon_min = epsilon_min
43+
self.batch_size = batch_size
44+
45+
self.q_network = DQN(state_dim, action_dim)
46+
self.target_network = DQN(state_dim, action_dim)
47+
self.target_network.load_state_dict(self.q_network.state_dict())
48+
self.optimizer = optim.Adam(self.q_network.parameters(), lr=lr)
49+
self.memory = deque(maxlen=buffer_size)
50+
self.update_counter = 0
51+
52+
def select_action(self, state):
53+
if random.random() < self.epsilon:
54+
return random.randint(0, self.action_dim - 1)
55+
state = torch.tensor(np.array(state), dtype=torch.float32).unsqueeze(0)
56+
with torch.no_grad():
57+
q_values = self.q_network(state)
58+
return int(torch.argmax(q_values).item())
59+
60+
def store_transition(self, state, action, reward, next_state, done):
61+
self.memory.append((state, action, reward, next_state, done))
62+
63+
def train(self):
64+
if len(self.memory) < self.batch_size:
65+
return
66+
67+
batch = random.sample(self.memory, self.batch_size)
68+
states, actions, rewards, next_states, dones = zip(*batch)
69+
70+
states = torch.tensor(np.array(states), dtype=torch.float32)
71+
actions = torch.tensor(np.array(actions), dtype=torch.int64).unsqueeze(1)
72+
rewards = torch.tensor(np.array(rewards), dtype=torch.float32).unsqueeze(1)
73+
next_states = torch.tensor(np.array(next_states), dtype=torch.float32)
74+
dones = torch.tensor(np.array(dones), dtype=torch.float32).unsqueeze(1)
75+
76+
q_values = self.q_network(states).gather(1, actions)
77+
with torch.no_grad():
78+
max_next_q = self.target_network(next_states).max(1)[0].unsqueeze(1)
79+
target_q = rewards + (1 - dones) * self.gamma * max_next_q
80+
81+
loss = F.mse_loss(q_values, target_q)
82+
self.optimizer.zero_grad()
83+
loss.backward()
84+
self.optimizer.step()
85+
86+
# target network update
87+
self.update_counter += 1
88+
if self.update_counter % 10 == 0:
89+
self.target_network.load_state_dict(self.q_network.state_dict())
90+
91+
def decay_epsilon(self):
92+
if self.epsilon > self.epsilon_min:
93+
self.epsilon *= self.epsilon_decay
94+
95+
def save(self, path):
96+
torch.save(self.q_network.state_dict(), path)
97+
98+
def load(self, path):
99+
self.q_network.load_state_dict(torch.load(path))
100+
self.target_network.load_state_dict(self.q_network.state_dict())

lace_rl/sim/__init__.py

Whitespace-only changes.

lace_rl/sim/trace_simulator.py

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
from dataclasses import dataclass
2+
from typing import Dict, List, Tuple
3+
4+
@dataclass
5+
class FunctionInvocation:
6+
timestamp: int
7+
pod_id: str
8+
region_id: str
9+
exec_time_s: float
10+
cpu_cores: float
11+
mem_MB: float
12+
cold_start_latency_s: float = 0.0
13+
user_region: str = 'local'
14+
15+
metadata: dict = None
16+
17+
@dataclass
18+
class GreenEnergy:
19+
timestamp: int
20+
region_id: str
21+
# green_ratio: float
22+
carbon_intensity: float
23+
24+
class EnergyEstimator:
25+
def __init__(self):
26+
# Simulate m5-series EC2 instance
27+
self.J_DRAM_per_MB = 0.00037
28+
self.J_CPU_per_mCore = 0.3
29+
# self.J_CPU_per_mCore = 0.188
30+
31+
self.LAMBDA_IDLE = 0.2
32+
self.N_CPU = 32
33+
self.TOTAL_DRAM_MB = 16384
34+
35+
def estimate_keep_alive_energy(self, mem_MB, cpu_cores, duration_s, carbon_intensity):
36+
cpu_frac = cpu_cores / self.N_CPU
37+
power = self.J_DRAM_per_MB * mem_MB + self.J_CPU_per_mCore * self.LAMBDA_IDLE * cpu_frac
38+
energy = power * duration_s
39+
carbon = self.estimate_carbon_emission(energy, carbon_intensity)
40+
return energy, carbon
41+
42+
def estimate_exec_energy(self, exec_time_s, cpu_cores, mem_MB):
43+
cpu_frac = cpu_cores / self.N_CPU
44+
power = self.J_DRAM_per_MB * mem_MB + self.J_CPU_per_mCore * cpu_frac
45+
return power * exec_time_s
46+
47+
def estimate_carbon_emission(self, energy_joule, carbon_intensity):
48+
energy_kwh = energy_joule / (3.6 * 1e6)
49+
return energy_kwh * carbon_intensity
50+
51+
class TraceDrivenSimulator:
52+
def __init__(self, green_trace: Dict[str, Dict[int, GreenEnergy]], network_latency_map=None, fixed_keep_alive_s=60.0):
53+
self.green_trace = green_trace
54+
self.fixed_keep_alive_s = fixed_keep_alive_s
55+
self.network_latency_map = network_latency_map or {}
56+
self.energy_estimator = EnergyEstimator()
57+
self.reset_metrics()
58+
59+
def reset_metrics(self):
60+
self.container_state: Dict[str, Tuple[int, float]] = {}
61+
self.total_energy = 0.0
62+
self.total_carbon = 0.0
63+
self.keep_alive_energy = 0.0
64+
self.execution_energy = 0.0
65+
self.keep_alive_carbon = 0.0
66+
self.execution_carbon = 0.0
67+
self.cold_start_count = 0
68+
self.total_latency = 0.0
69+
self.total_network_latency = 0.0
70+
self.total_invocations = 0
71+
self.total_idle_time: Dict[str, float] = {}
72+
self.total_idle_energy: Dict[str, float] = {}
73+
74+
def run(self, invocations: List[FunctionInvocation]):
75+
for inv in invocations:
76+
green_ene = self.green_trace.get(inv.region_id, {}).get(int(inv.timestamp // 3600 * 3600))
77+
if green_ene is None:
78+
print("No energy data provided.")
79+
continue
80+
81+
self.total_invocations += 1
82+
is_cold = (
83+
inv.pod_id not in self.container_state or
84+
inv.timestamp > self.container_state[inv.pod_id][0] + self.fixed_keep_alive_s
85+
)
86+
87+
net_latency = 2 * self.network_latency_map.get((inv.user_region, inv.region_id), 0.0)
88+
if is_cold:
89+
self.cold_start_count += 1
90+
self.total_latency += inv.exec_time_s / 0.9 + inv.cold_start_latency_s + net_latency
91+
else:
92+
self.total_latency += inv.exec_time_s / 0.9 + net_latency
93+
94+
self.total_network_latency += net_latency
95+
96+
e_energy = self.energy_estimator.estimate_exec_energy(inv.exec_time_s, inv.cpu_cores, inv.mem_MB)
97+
e_carbon = self.energy_estimator.estimate_carbon_emission(e_energy, green_ene.carbon_intensity)
98+
self.execution_energy += e_energy
99+
self.execution_carbon += e_carbon
100+
101+
if inv.pod_id in self.container_state:
102+
last_ts, mem_MB, cpu_cores = self.container_state[inv.pod_id]
103+
idle_time = inv.timestamp - last_ts
104+
if idle_time > 0:
105+
recorded_idle_time = min(idle_time, self.fixed_keep_alive_s)
106+
k_energy, k_carbon = self.energy_estimator.estimate_keep_alive_energy(
107+
mem_MB, cpu_cores, recorded_idle_time, green_ene.carbon_intensity
108+
)
109+
self.keep_alive_energy += k_energy
110+
self.keep_alive_carbon += k_carbon
111+
self.total_idle_time.setdefault(inv.pod_id, 0.0)
112+
self.total_idle_time[inv.pod_id] += recorded_idle_time
113+
self.total_idle_energy.setdefault(inv.pod_id, 0.0)
114+
self.total_idle_energy[inv.pod_id] += k_energy
115+
116+
self.container_state[inv.pod_id] = (
117+
inv.timestamp,
118+
inv.mem_MB,
119+
inv.cpu_cores
120+
)
121+
122+
self.total_energy = self.execution_energy + self.keep_alive_energy
123+
self.total_carbon = self.execution_carbon + self.keep_alive_carbon
124+
125+
return {
126+
'total_energy_J': self.total_energy,
127+
'total_carbon_g': self.total_carbon,
128+
'cold_starts': self.cold_start_count,
129+
'avg_latency_s': self.total_latency / self.total_invocations if self.total_invocations else 0.0,
130+
'total_network_latency_s': self.total_network_latency,
131+
'invocation_count': self.total_invocations,
132+
'keep_alive_energy_J': self.keep_alive_energy,
133+
'execution_energy_J': self.execution_energy,
134+
'keep_alive_carbon_g': self.keep_alive_carbon,
135+
'execution_carbon_g': self.execution_carbon,
136+
'idle_time_by_function': self.total_idle_time,
137+
'idle_energy_by_function': self.total_idle_energy
138+
}
139+
140+
def run_with_agent(self, invocations: List[FunctionInvocation], agent, obs_fn):
141+
self.reset_metrics()
142+
for inv in invocations:
143+
green_ene = self.green_trace.get(inv.region_id, {}).get(int(inv.timestamp // 3600 * 3600))
144+
if green_ene is None:
145+
continue
146+
147+
obs = obs_fn(inv)
148+
action = agent.select_action(obs)
149+
# dest_region, keep_alive_s = agent.action_lookup[action]
150+
_, keep_alive_s = agent.action_lookup[action]
151+
dest_region = inv.user_region # Hold for future cross-region usage
152+
153+
self.total_invocations += 1
154+
is_cold = (
155+
inv.pod_id not in self.container_state or
156+
inv.timestamp > self.container_state[inv.pod_id][0] + keep_alive_s
157+
)
158+
159+
net_latency = 2 * self.network_latency_map.get((inv.user_region, dest_region), 0.0)
160+
if is_cold:
161+
self.cold_start_count += 1
162+
self.total_latency += inv.exec_time_s + inv.cold_start_latency_s + net_latency
163+
else:
164+
self.total_latency += inv.exec_time_s + net_latency
165+
166+
self.total_network_latency += net_latency
167+
168+
e_energy = self.energy_estimator.estimate_exec_energy(inv.exec_time_s, inv.cpu_cores, inv.mem_MB)
169+
e_carbon = self.energy_estimator.estimate_carbon_emission(e_energy, green_ene.carbon_intensity)
170+
self.execution_energy += e_energy
171+
self.execution_carbon += e_carbon
172+
173+
if inv.pod_id in self.container_state:
174+
last_ts, mem_MB, cpu_cores = self.container_state[inv.pod_id]
175+
idle_time = inv.timestamp - last_ts
176+
if idle_time > 0:
177+
recorded_idle_time = min(idle_time, keep_alive_s)
178+
k_energy, k_carbon = self.energy_estimator.estimate_keep_alive_energy(
179+
mem_MB, cpu_cores, recorded_idle_time, green_ene.carbon_intensity
180+
)
181+
self.keep_alive_energy += k_energy
182+
self.keep_alive_carbon += k_carbon
183+
self.total_idle_time.setdefault(inv.pod_id, 0.0)
184+
self.total_idle_time[inv.pod_id] += recorded_idle_time
185+
self.total_idle_energy.setdefault(inv.pod_id, 0.0)
186+
self.total_idle_energy[inv.pod_id] += k_energy
187+
188+
self.container_state[inv.pod_id] = (
189+
inv.timestamp,
190+
inv.mem_MB,
191+
inv.cpu_cores
192+
)
193+
194+
self.total_energy = self.execution_energy + self.keep_alive_energy
195+
self.total_carbon = self.execution_carbon + self.keep_alive_carbon
196+
197+
return {
198+
'total_energy_J': self.total_energy,
199+
'total_carbon_g': self.total_carbon,
200+
'cold_starts': self.cold_start_count,
201+
'avg_latency_s': self.total_latency / self.total_invocations if self.total_invocations else 0.0,
202+
'total_network_latency_s': self.total_network_latency,
203+
'invocation_count': self.total_invocations,
204+
'keep_alive_energy_J': self.keep_alive_energy,
205+
'execution_energy_J': self.execution_energy,
206+
'keep_alive_carbon_g': self.keep_alive_carbon,
207+
'execution_carbon_g': self.execution_carbon,
208+
'idle_time_by_function': self.total_idle_time,
209+
'idle_energy_by_function': self.total_idle_energy
210+
}
211+
212+
def run_with_agent_with_action_log(self, invocations: List[FunctionInvocation], agent, obs_fn):
213+
self.reset_metrics()
214+
actions = []
215+
for inv in invocations:
216+
green_ene = self.green_trace.get(inv.region_id, {}).get(int(inv.timestamp // 3600 * 3600))
217+
if green_ene is None:
218+
continue
219+
220+
obs = obs_fn(inv)
221+
action = agent.select_action(obs)
222+
# dest_region, keep_alive_s = agent.action_lookup[action]
223+
_, keep_alive_s = agent.action_lookup[action]
224+
actions.append(keep_alive_s)
225+
dest_region = inv.user_region
226+
227+
self.total_invocations += 1
228+
is_cold = (
229+
inv.pod_id not in self.container_state or
230+
inv.timestamp > self.container_state[inv.pod_id][0] + keep_alive_s
231+
)
232+
233+
net_latency = 2 * self.network_latency_map.get((inv.user_region, dest_region), 0.0)
234+
if is_cold:
235+
self.cold_start_count += 1
236+
self.total_latency += inv.exec_time_s + inv.cold_start_latency_s + net_latency
237+
else:
238+
self.total_latency += inv.exec_time_s + net_latency
239+
240+
self.total_network_latency += net_latency
241+
242+
e_energy = self.energy_estimator.estimate_exec_energy(inv.exec_time_s, inv.cpu_cores, inv.mem_MB)
243+
e_carbon = self.energy_estimator.estimate_carbon_emission(e_energy, green_ene.carbon_intensity)
244+
self.execution_energy += e_energy
245+
self.execution_carbon += e_carbon
246+
247+
if inv.pod_id in self.container_state:
248+
last_ts, mem_MB, cpu_cores = self.container_state[inv.pod_id]
249+
idle_time = inv.timestamp - last_ts
250+
if idle_time > 0:
251+
recorded_idle_time = min(idle_time, keep_alive_s)
252+
k_energy, k_carbon = self.energy_estimator.estimate_keep_alive_energy(
253+
mem_MB, cpu_cores, recorded_idle_time, green_ene.carbon_intensity
254+
)
255+
self.keep_alive_energy += k_energy
256+
self.keep_alive_carbon += k_carbon
257+
self.total_idle_time.setdefault(inv.pod_id, 0.0)
258+
self.total_idle_time[inv.pod_id] += recorded_idle_time
259+
self.total_idle_energy.setdefault(inv.pod_id, 0.0)
260+
self.total_idle_energy[inv.pod_id] += k_energy
261+
262+
self.container_state[inv.pod_id] = (
263+
inv.timestamp,
264+
inv.mem_MB,
265+
inv.cpu_cores
266+
)
267+
268+
self.total_energy = self.execution_energy + self.keep_alive_energy
269+
self.total_carbon = self.execution_carbon + self.keep_alive_carbon
270+
271+
return {
272+
'total_energy_J': self.total_energy,
273+
'total_carbon_g': self.total_carbon,
274+
'cold_starts': self.cold_start_count,
275+
'avg_latency_s': self.total_latency / self.total_invocations if self.total_invocations else 0.0,
276+
'total_network_latency_s': self.total_network_latency,
277+
'invocation_count': self.total_invocations,
278+
'keep_alive_energy_J': self.keep_alive_energy,
279+
'execution_energy_J': self.execution_energy,
280+
'keep_alive_carbon_g': self.keep_alive_carbon,
281+
'execution_carbon_g': self.execution_carbon,
282+
'idle_time_by_function': self.total_idle_time,
283+
'idle_energy_by_function': self.total_idle_energy
284+
}, actions

0 commit comments

Comments
 (0)