Skip to content
This repository was archived by the owner on Jan 27, 2026. It is now read-only.

Commit 66cea22

Browse files
authored
fix compute requirement parsing creates multiple subojects in gpu array (#347)
1 parent ece85e1 commit 66cea22

2 files changed

Lines changed: 52 additions & 32 deletions

File tree

crates/orchestrator/src/plugins/node_groups/tests.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,51 @@ fn create_test_node(
4444
compute_specs,
4545
}
4646
}
47+
#[tokio::test]
48+
async fn test_parsing_groups_from_string() {
49+
let group_config = r#"
50+
[
51+
{
52+
"name": "a100-group",
53+
"min_group_size": 2,
54+
"max_group_size": 2,
55+
"compute_requirements": "gpu:model=A100;gpu:count=8"
56+
},
57+
{
58+
"name": "h100-group",
59+
"min_group_size": 1,
60+
"max_group_size": 1,
61+
"compute_requirements": "gpu:count=8;gpu:model=H100"
62+
}
63+
]
64+
"#;
65+
let groups = serde_json::from_str::<Vec<NodeGroupConfiguration>>(group_config).unwrap();
66+
assert_eq!(groups.len(), 2);
67+
68+
// Check A100 group config
69+
let a100_config = &groups[0];
70+
assert_eq!(a100_config.name, "a100-group");
71+
assert_eq!(a100_config.min_group_size, 2);
72+
assert_eq!(a100_config.max_group_size, 2);
73+
74+
let a100_requirements = a100_config.compute_requirements.as_ref().unwrap();
75+
assert_eq!(a100_requirements.gpu.len(), 1);
76+
let a100_gpu_spec = &a100_requirements.gpu[0];
77+
assert_eq!(a100_gpu_spec.model, Some("A100".to_string()));
78+
assert_eq!(a100_gpu_spec.count, Some(8));
79+
80+
// Check H100 group config
81+
let h100_config = &groups[1];
82+
assert_eq!(h100_config.name, "h100-group");
83+
assert_eq!(h100_config.min_group_size, 1);
84+
assert_eq!(h100_config.max_group_size, 1);
85+
86+
let h100_requirements = h100_config.compute_requirements.as_ref().unwrap();
87+
assert_eq!(h100_requirements.gpu.len(), 1);
88+
let h100_gpu_spec = &h100_requirements.gpu[0];
89+
assert_eq!(h100_gpu_spec.model, Some("H100".to_string()));
90+
assert_eq!(h100_gpu_spec.count, Some(8));
91+
}
4792

4893
#[tokio::test]
4994
async fn test_group_formation_and_dissolution() {

crates/shared/src/models/node.rs

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,10 @@ impl fmt::Display for CpuSpecs {
122122
// Parser for compute requirements string
123123
impl FromStr for ComputeRequirements {
124124
type Err = anyhow::Error;
125-
126125
fn from_str(s: &str) -> Result<Self, Self::Err> {
127126
let mut requirements = ComputeRequirements::default();
128127
let mut current_gpu_spec = GpuSpecs::default();
129-
let mut gpu_spec_started = false; // Track if we've started defining a GPU spec
128+
let mut gpu_spec_started = false;
130129

131130
for part in s.split(';') {
132131
let part = part.trim();
@@ -145,26 +144,10 @@ impl FromStr for ComputeRequirements {
145144
match key {
146145
// --- GPU Specifications ---
147146
"gpu:count" => {
148-
// If a GPU spec was already being defined, push it before starting a new one
149-
if gpu_spec_started
150-
&& (current_gpu_spec.model.is_some()
151-
|| current_gpu_spec.memory_mb.is_some())
152-
{
153-
// Basic validation: ensure at least model or memory was also set if count > 0
154-
if current_gpu_spec.count.unwrap_or(0) > 0
155-
&& current_gpu_spec.model.is_none()
156-
&& current_gpu_spec.memory_mb.is_none()
157-
{
158-
// Allow count=0 without model/mem, but require model/mem if count > 0 is specified for a previous entry.
159-
// This logic might need refinement based on exact requirements.
160-
// For now, we push what we have.
161-
}
162-
requirements.gpu.push(current_gpu_spec);
163-
current_gpu_spec = GpuSpecs::default(); // Reset for the new spec
164-
} else if gpu_spec_started && current_gpu_spec.count.is_some() {
165-
// Handle case like "gpu:count=1; gpu:count=2" - push the first count=1 spec (implicitly)
147+
// If we have a complete GPU spec, push it before starting a new one
148+
if gpu_spec_started && current_gpu_spec.count.is_some() {
166149
requirements.gpu.push(current_gpu_spec);
167-
current_gpu_spec = GpuSpecs::default(); // Reset for the new spec
150+
current_gpu_spec = GpuSpecs::default();
168151
}
169152

170153
gpu_spec_started = true;
@@ -175,21 +158,14 @@ impl FromStr for ComputeRequirements {
175158
);
176159
}
177160
"gpu:model" => {
178-
if !gpu_spec_started && current_gpu_spec.count.is_none() {
179-
// If model comes before count, implicitly start a spec
161+
if !gpu_spec_started {
180162
gpu_spec_started = true;
181-
} else if !gpu_spec_started {
182-
return Err(anyhow!("'gpu:model' specified without preceding 'gpu:count' to start a new GPU requirement block"));
183163
}
184-
// TODO: Handle comma-separated models if needed, e.g., "A100,H100"
185164
current_gpu_spec.model = Some(value.to_string());
186165
}
187166
"gpu:memory_mb" => {
188-
if !gpu_spec_started && current_gpu_spec.count.is_none() {
189-
// If memory comes before count, implicitly start a spec
167+
if !gpu_spec_started {
190168
gpu_spec_started = true;
191-
} else if !gpu_spec_started {
192-
return Err(anyhow!("'gpu:memory_mb' specified without preceding 'gpu:count' to start a new GPU requirement block"));
193169
}
194170
current_gpu_spec.memory_mb =
195171
Some(value.parse::<u32>().map_err(|e| {
@@ -227,15 +203,14 @@ impl FromStr for ComputeRequirements {
227203
}
228204
}
229205

230-
// Push the last defined GPU spec if it exists
206+
// Push the last GPU spec if it exists and has any properties set
231207
if gpu_spec_started
232208
&& (current_gpu_spec.count.is_some()
233209
|| current_gpu_spec.model.is_some()
234210
|| current_gpu_spec.memory_mb.is_some())
235211
{
236212
requirements.gpu.push(current_gpu_spec);
237213
}
238-
// If no GPU specs were mentioned at all, requirements.gpu remains empty.
239214

240215
Ok(requirements)
241216
}

0 commit comments

Comments
 (0)