Skip to content
This repository was archived by the owner on Jan 27, 2026. It is now read-only.

Commit 49d5c00

Browse files
authored
Feature: Node group plugin with configurable topologies (#335)
* ability to have multiple node group configurations * ability to set compute requirements per node group
1 parent a265c64 commit 49d5c00

6 files changed

Lines changed: 519 additions & 113 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ watch-validator:
9191

9292
watch-orchestrator:
9393
set -a; source ${ENV_FILE}; set +a; \
94-
cargo watch -w crates/orchestrator/src -x "run --bin orchestrator -- -r $$RPC_URL -k $$POOL_OWNER_PRIVATE_KEY -d 0 -p 8090 -i 10 -u http://localhost:8090 --s3-credentials $$S3_CREDENTIALS --compute-pool-id $$WORKER_COMPUTE_POOL_ID --bucket-name $$BUCKET_NAME -l $${LOG_LEVEL:-info} --hourly-s3-upload-limit $${HOURLY_S3_LIMIT:-3} --with-basic-group-plugin --group-size 1"
94+
cargo watch -w crates/orchestrator/src -x "run --bin orchestrator -- -r $$RPC_URL -k $$POOL_OWNER_PRIVATE_KEY -d 0 -p 8090 -i 10 -u http://localhost:8090 --s3-credentials $$S3_CREDENTIALS --compute-pool-id $$WORKER_COMPUTE_POOL_ID --bucket-name $$BUCKET_NAME -l $${LOG_LEVEL:-info} --hourly-s3-upload-limit $${HOURLY_S3_LIMIT:-3} --node-group-configs '[{\"name\": \"test-config\", \"min_group_size\": 1, \"max_group_size\": 1, \"compute_requirements\": null}]'"
9595

9696
build-worker:
9797
cargo build --release --bin worker

crates/orchestrator/Dockerfile

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ ENV LOG_LEVEL=""
2323
ENV HOURLY_S3_UPLOAD_LIMIT="2"
2424
ENV WEBHOOK_URLS=""
2525
ENV MODE="full"
26-
ENV WITH_BASIC_GROUP_PLUGIN="false"
27-
ENV GROUP_SIZE="4"
26+
ENV NODE_GROUP_CONFIGS=""
2827

2928
RUN echo '#!/bin/sh\n\
3029
exec /usr/local/bin/orchestrator \
@@ -46,8 +45,7 @@ $([ ! -z "$BUCKET_NAME" ] && echo "--bucket-name $BUCKET_NAME") \
4645
$([ ! -z "$LOG_LEVEL" ] && echo "--log-level $LOG_LEVEL") \
4746
$([ ! -z "$HOURLY_S3_UPLOAD_LIMIT" ] && echo "--hourly-s3-upload-limit $HOURLY_S3_UPLOAD_LIMIT") \
4847
$([ ! -z "$WEBHOOK_URLS" ] && echo "--webhook-urls $WEBHOOK_URLS") \
49-
$([ "$WITH_BASIC_GROUP_PLUGIN" = "true" ] && echo "--with-basic-group-plugin") \
50-
$([ ! -z "$GROUP_SIZE" ] && echo "--group-size $GROUP_SIZE") \
48+
$([ ! -z "$NODE_GROUP_CONFIGS" ] && echo "--node-group-configs $NODE_GROUP_CONFIGS") \
5149
"$@"' > /entrypoint.sh && \
5250
chmod +x /entrypoint.sh
5351

crates/orchestrator/src/api/routes/nodes.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ async fn get_nodes(app_state: Data<AppState>) -> HttpResponse {
4545
node_json["group"] = json!({
4646
"id": group.id,
4747
"size": group.nodes.len(),
48-
"created_at": group.created_at
48+
"created_at": group.created_at,
49+
"topology_config": group.configuration_name
4950
});
5051
}
5152

crates/orchestrator/src/main.rs

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ use scheduler::plugins::SchedulerPlugin;
2828
use shared::web3::contracts::core::builder::ContractBuilder;
2929
use shared::web3::contracts::structs::compute_pool::PoolStatus;
3030
use shared::web3::wallet::Wallet;
31+
use status_update::plugins::node_groups::NodeGroupConfiguration;
3132
use status_update::plugins::node_groups::NodeGroupsPlugin;
3233
use status_update::plugins::StatusUpdatePlugin;
3334
use std::sync::Arc;
@@ -115,14 +116,9 @@ struct Args {
115116
#[arg(long, default_value = "")]
116117
webhook_urls: Option<String>,
117118

118-
/// With basic group plugin
119-
/// Only temporary setting - will be moved to proper plugin config
119+
/// Node group configurations in JSON format
120120
#[arg(long)]
121-
with_basic_group_plugin: bool,
122-
123-
/// Group size
124-
#[arg(long, default_value = "4")]
125-
group_size: u32,
121+
node_group_configs: Option<String>,
126122
}
127123

128124
#[tokio::main]
@@ -203,16 +199,26 @@ async fn main() -> Result<()> {
203199
let mut status_update_plugins: Vec<Box<dyn StatusUpdatePlugin>> = vec![];
204200
let mut node_groups_plugin: Option<Arc<NodeGroupsPlugin>> = None;
205201

206-
// Add group plugin if enabled
207-
if args.with_basic_group_plugin {
208-
let group_size: usize = args.group_size as usize;
209-
let group_plugin =
210-
NodeGroupsPlugin::new(group_size, group_size, store.clone(), group_store_context);
211-
let status_group_plugin = group_plugin.clone();
212-
let group_plugin_for_server = group_plugin.clone();
213-
node_groups_plugin = Some(Arc::new(group_plugin_for_server));
214-
scheduler_plugins.push(Box::new(group_plugin));
215-
status_update_plugins.push(Box::new(status_group_plugin));
202+
// This config loading is pretty ugly atm and should be optimized
203+
// Issue: https://github.com/PrimeIntellect-ai/protocol/issues/336
204+
if let Some(configs_json) = args.node_group_configs {
205+
match serde_json::from_str::<Vec<NodeGroupConfiguration>>(&configs_json) {
206+
Ok(configs) if !configs.is_empty() => {
207+
let group_plugin =
208+
NodeGroupsPlugin::new(configs, store.clone(), group_store_context);
209+
let status_group_plugin = group_plugin.clone();
210+
let group_plugin_for_server = group_plugin.clone();
211+
node_groups_plugin = Some(Arc::new(group_plugin_for_server));
212+
scheduler_plugins.push(Box::new(group_plugin));
213+
status_update_plugins.push(Box::new(status_group_plugin));
214+
}
215+
Ok(_) => {
216+
info!("No node group configurations provided, skipping plugin setup");
217+
}
218+
Err(e) => {
219+
panic!("Failed to parse node group configurations: {}", e);
220+
}
221+
}
216222
}
217223

218224
let scheduler = Scheduler::new(store_context.clone(), scheduler_plugins);

0 commit comments

Comments
 (0)