Skip to content

Commit ac97bc8

Browse files
committed
feat(pegboard): add local image cache and overlay fs mounts
1 parent b3aa8b4 commit ac97bc8

34 files changed

Lines changed: 1050 additions & 465 deletions

File tree

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
{
22
"builds": {
3-
"ws-isolate": {
4-
"script": "src/isolate/main.ts"
5-
},
3+
// "ws-isolate": {
4+
// "script": "src/isolate/main.ts"
5+
// },
66
"ws-container": {
7-
"dockerfile": "Dockerfile"
7+
"dockerfile": "Dockerfile",
8+
"unstable": {
9+
"compression": "none"
10+
}
811
}
912
}
1013
}

packages/core/services/cluster/src/workflows/server/install/install_scripts/files/pegboard_configure.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,8 @@ ExecStart=/usr/local/bin/rivet-client -c /etc/rivet-client/config.json
353353
Restart=always
354354
RestartSec=2
355355
356+
# High scheduling priority
357+
Nice=-15
356358
# Real time service
357359
CPUSchedulingPolicy=fifo
358360
# High CPU priority

packages/core/services/cluster/src/workflows/server/install/install_scripts/files/rivet_guard_configure.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ ExecStart=/usr/local/bin/rivet-guard
2626
Restart=always
2727
RestartSec=2
2828
29+
# High scheduling priority
30+
Nice=-15
2931
# Real time service
3032
CPUSchedulingPolicy=fifo
3133
# High CPU priority

packages/core/services/cluster/src/workflows/server/install/install_scripts/files/rivet_worker_configure.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ ExecStart=/usr/local/bin/rivet-edge-server start --skip-provision
2828
Restart=always
2929
RestartSec=2
3030
31+
# High scheduling priority
32+
Nice=-15
3133
# Real time service
3234
CPUSchedulingPolicy=fifo
3335
# High CPU priority

packages/edge/infra/client/container-runner/src/container.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ pub fn run(
3232
.context("empty `actor_path`")?
3333
.to_string_lossy()
3434
.to_string();
35-
let fs_path = actor_path.join("fs");
35+
let fs_path = actor_path.join("fs").join("upper");
3636
let oci_bundle_config_json = fs_path.join("config.json");
3737

3838
// Validate OCI bundle

packages/edge/infra/client/echo/src/main.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@ use tiny_http::{Response, Server, StatusCode};
22

33
// TODO: This can't pick up SIGTERM
44
fn main() {
5-
let port = std::env::var("PORT")
6-
.ok()
7-
.unwrap_or_else(|| "8080".to_string());
5+
println!("Env:");
6+
for (key, value) in std::env::vars() {
7+
println!(" {}: {}", key, value);
8+
}
9+
10+
let port = std::env::var("PORT_MAIN").expect("no PORT_MAIN");
811
let addr = format!("0.0.0.0:{port}");
912
let server = Server::http(&addr).unwrap();
1013
println!("Listening on {addr}");

packages/edge/infra/client/isolate-v8-runner/src/isolate.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ pub async fn run_inner(
153153
tracing::info!(?actor_id, ?generation, "isolate kv initialized");
154154

155155
// Should match the path from `Actor::make_fs` in manager/src/actor/setup.rs
156-
let index = actor_path.join("fs").join("index.js");
156+
let index = actor_path.join("fs").join("upper").join("index.js");
157157

158158
// Load index.js
159159
let index_script_content = match fs::read_to_string(&index).await {
@@ -560,7 +560,8 @@ mod tests {
560560

561561
let fs_path = actors_path
562562
.join(format!("{actor_id}-{generation}"))
563-
.join("fs");
563+
.join("fs")
564+
.join("upper");
564565
std::fs::create_dir_all(&fs_path)?;
565566

566567
std::fs::copy(

packages/edge/infra/client/manager/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ rand = "0.8"
3030
rand_chacha = "0.3.1"
3131
reqwest = { version = "0.12", default-features = false, features = ["stream", "rustls-tls", "json"] }
3232
rivet-logs.workspace = true
33+
scc = "2.3.4"
3334
serde = { version = "1.0.195", features = ["derive"] }
3435
serde_json = "1.0.111"
3536
serde_yaml = "0.9.34"

packages/edge/infra/client/manager/src/actor/mod.rs

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
use std::{
22
result::Result::{Err, Ok},
33
sync::Arc,
4-
time::Duration,
4+
time::{Duration, Instant},
55
};
66

77
use anyhow::*;
88
use indoc::indoc;
99
use nix::sys::signal::Signal;
1010
use pegboard::protocol;
1111
use pegboard_config::runner_protocol;
12+
use sqlx::Acquire;
1213
use tokio::{fs, sync::Mutex};
1314
use uuid::Uuid;
1415

@@ -75,16 +76,18 @@ impl Actor {
7576
actor_id,
7677
generation,
7778
config,
78-
start_ts
79+
start_ts,
80+
image_id
7981
)
80-
VALUES (?1, ?2, ?3, ?4)
82+
VALUES (?1, ?2, ?3, ?4, ?5)
8183
ON CONFLICT (actor_id, generation) DO NOTHING
8284
",
8385
))
8486
.bind(self.actor_id)
8587
.bind(self.generation as i64)
8688
.bind(&config_json)
8789
.bind(utils::now())
90+
.bind(self.config.image.id)
8891
.execute(&mut *ctx.sql().await?)
8992
.await
9093
})
@@ -128,7 +131,7 @@ impl Actor {
128131
self: &Arc<Self>,
129132
ctx: &Arc<Ctx>,
130133
) -> Result<protocol::HashableMap<String, protocol::ProxiedPort>> {
131-
let setup_timer = std::time::Instant::now();
134+
let setup_timer = Instant::now();
132135
tracing::info!(actor_id=?self.actor_id, generation=?self.generation, "setting up actor");
133136

134137
let actor_path = ctx.actor_path(self.actor_id, self.generation);
@@ -146,19 +149,13 @@ impl Actor {
146149
protocol::ImageKind::DockerImage | protocol::ImageKind::OciBundle
147150
) && matches!(self.config.network_mode, protocol::NetworkMode::Bridge);
148151

149-
// Parallelize two independent jobs:
150-
//
151-
// - `download_image` takes a long time to download. `download_image` is dependent on
152-
// `make_fs`
153-
// - `setup_cni_network` takes a long time. `setup_cni_network` is dependent on
154-
// `bind_ports`.
155152
tracing::info!(actor_id=?self.actor_id, generation=?self.generation, "starting parallel setup tasks");
156-
let parallel_timer = std::time::Instant::now();
153+
let parallel_timer = Instant::now();
157154

158155
let (_, ports) = tokio::try_join!(
159156
async {
160-
self.make_fs(&ctx).await?;
161157
self.download_image(&ctx).await?;
158+
self.make_fs(&ctx).await?;
162159
Result::<(), anyhow::Error>::Ok(())
163160
},
164161
async {
@@ -205,7 +202,12 @@ impl Actor {
205202
let mut runner_env = vec![
206203
(
207204
"ROOT_USER_ENABLED",
208-
if self.config.root_user_enabled { "1" } else { "0" }.to_string(),
205+
if self.config.root_user_enabled {
206+
"1"
207+
} else {
208+
"0"
209+
}
210+
.to_string(),
209211
),
210212
("ACTOR_ID", self.actor_id.to_string()),
211213
];
@@ -443,7 +445,10 @@ impl Actor {
443445
// Update stop_ts
444446
if matches!(signal, Signal::SIGTERM | Signal::SIGKILL) || !has_runner {
445447
let stop_ts_set = utils::sql::query(|| async {
446-
sqlx::query_as::<_, (bool,)>(indoc!(
448+
let mut conn = ctx.sql().await?;
449+
let mut tx = conn.begin().await?;
450+
451+
let res = sqlx::query_as::<_, (bool,)>(indoc!(
447452
"
448453
UPDATE actors
449454
SET stop_ts = ?3
@@ -457,11 +462,27 @@ impl Actor {
457462
.bind(self.actor_id)
458463
.bind(self.generation as i64)
459464
.bind(utils::now())
460-
.fetch_optional(&mut *ctx.sql().await?)
461-
.await
465+
.fetch_optional(&mut *tx)
466+
.await?;
467+
468+
// Update LRU cache
469+
sqlx::query(indoc!(
470+
"
471+
UPDATE images_cache
472+
SET last_used_ts = ?2
473+
WHERE image_id = ?1
474+
",
475+
))
476+
.bind(self.config.image.id)
477+
.bind(utils::now())
478+
.execute(&mut *tx)
479+
.await?;
480+
481+
tx.commit().await?;
482+
483+
Ok(res.is_some())
462484
})
463-
.await?
464-
.is_some();
485+
.await?;
465486

466487
// Emit event if not stopped before
467488
if stop_ts_set {

packages/edge/infra/client/manager/src/actor/oci_config.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@ pub fn config(opts: ConfigOpts) -> Result<serde_json::Value> {
2525
//
2626
// Corresponds to cpu.weight in cgroups. Must be [1, 10_000]
2727
//
28-
// We divide by 8 in order to make sure the CPU shares are within bounds. `cpu` is measured in
29-
// millishares, so 1_000 = 1 core. For a range of 32d1 (32_000) to 1d16 (62), we divide by 8
30-
// to make the range 3_200 to 6.
28+
// We divide by 10 in order to make sure the CPU shares are within bounds.
3129
let mut cpu_shares = opts.cpu / 10;
3230
if cpu_shares > 10_000 {
3331
cpu_shares = 10_000;

0 commit comments

Comments
 (0)