|
10 | 10 | DeploymentActivationResponse, |
11 | 11 | DeploymentDeactivationResponse, |
12 | 12 | DeploymentListResponse, |
13 | | - DeploymentUpdateRequest, |
14 | 13 | ) |
15 | 14 |
|
16 | 15 | from projectdavid.clients.base_client import BaseAPIClient |
@@ -277,16 +276,20 @@ def update( |
277 | 276 | ) |
278 | 277 |
|
279 | 278 | # Build payload — only include fields the caller explicitly passed |
280 | | - payload = DeploymentUpdateRequest( |
281 | | - gpu_memory_utilization=gpu_memory_utilization, |
282 | | - max_model_len=max_model_len, |
283 | | - max_num_seqs=max_num_seqs, |
284 | | - quantization=quantization, |
285 | | - dtype=dtype, |
286 | | - enforce_eager=enforce_eager, |
287 | | - limit_mm_per_prompt=limit_mm_per_prompt, |
288 | | - tensor_parallel_size=tensor_parallel_size, |
289 | | - ).model_dump(exclude_unset=True) |
| 279 | + payload = { |
| 280 | + k: v |
| 281 | + for k, v in { |
| 282 | + "gpu_memory_utilization": gpu_memory_utilization, |
| 283 | + "max_model_len": max_model_len, |
| 284 | + "max_num_seqs": max_num_seqs, |
| 285 | + "quantization": quantization, |
| 286 | + "dtype": dtype, |
| 287 | + "enforce_eager": enforce_eager, |
| 288 | + "limit_mm_per_prompt": limit_mm_per_prompt, |
| 289 | + "tensor_parallel_size": tensor_parallel_size, |
| 290 | + }.items() |
| 291 | + if v is not None |
| 292 | + } |
290 | 293 |
|
291 | 294 | try: |
292 | 295 | response = self.client.patch( |
|
0 commit comments