Skip to content

Commit 5bda29b

Browse files
committed
Refactoring and fixing lint errors.
1 parent 5f679a9 commit 5bda29b

7 files changed

Lines changed: 150 additions & 142 deletions

File tree

jetstream/core/lora/adapter_tensorstore.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ def __init__(self,
104104

105105
def register_adapter(self,
106106
adapter_id: str,
107-
adapter_path: str = None,
108-
adapter_config: Dict[str, Any] = None):
107+
adapter_path: str | None = None,
108+
adapter_config: Dict[str, Any] | None = None):
109109
"""Registers a new LoRA adatper."""
110110
"""
111111
Registers a LoRA adapter with the TensorStore. This also loads the adapter;
@@ -284,6 +284,7 @@ async def load_adapter(
284284

285285
try:
286286
if adapter_weights is None:
287+
adapter_path = f"{self.adapters_dir_path}/{adapter_id}"
287288
adapter_weights, adapter_config = self.engine.load_single_adapter(adapter_path)
288289

289290
if adapter_weights is None:

jetstream/core/lora/multi_lora_inference_api.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def models(
4040
"""ListAdapters all loaded LoRA adapters."""
4141

4242
try:
43-
adapters = self._driver.listAdaptersFromTensorstore()
43+
adapters = self._driver.list_adapters_from_tensorstore()
4444

4545
adapter_infos = []
4646
for adapter_id, adapter_data in adapters.items():
@@ -77,7 +77,7 @@ def load_lora_adapter(
7777
"""Load a LoRA adapter as mentioned in the request."""
7878

7979
try:
80-
self._driver.loadAdapterToTensorstore(request.adapter_id, request.adapter_path)
80+
self._driver.load_adapter_to_tensorstore(request.adapter_id, request.adapter_path)
8181

8282
return multi_lora_decoding_pb2.LoadAdapterResponse(success=True)
8383
except Exception as e:
@@ -93,7 +93,7 @@ def unload_lora_adapter(
9393
"""Unload a LoRA adapter as mentioned in the request."""
9494

9595
try:
96-
self._driver.unloadAdapterFromTensorstore(request.adapter_id)
96+
self._driver.unload_adapter_from_tensorstore(request.adapter_id)
9797
return multi_lora_decoding_pb2.UnloadAdapterResponse(success=True)
9898
except Exception as e:
9999
logging.info(f"Loading of adapter_id={request.adapter_id} failed with error: {str(e)}")

jetstream/core/metrics/prometheus.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,14 +254,14 @@ def __init__(self, model_name: Optional[str] = None):
254254

255255
_kv_cache_utilization = Gauge(
256256
name="kv_cache_utilization_perc",
257-
documentation="Percentage of kv-cache utilized by the requests under processing.",
257+
documentation="kv-cache utilization % by the requests under processing.",
258258
labelnames=["id"],
259259
multiprocess_mode="sum",
260260
)
261261

262262
_lora_request_info = Gauge(
263263
name="lora_request_info",
264-
documentation="Information about LoRA adapters loaded into TPU Memory for serving current requests.",
264+
documentation="LoRA adapters loaded into HBM for processing requests.",
265265
labelnames=[
266266
"id",
267267
"max_lora",
@@ -322,4 +322,5 @@ def get_kv_cache_utilization_metric(self):
322322
return self._kv_cache_utilization.labels(**self.universal_labels)
323323

324324
def get_lora_request_info_metric(self, max_lora: int, loaded_adapters: str):
325-
return self._lora_request_info.labels(**self.universal_labels, max_lora=max_lora, running_lora_adapters=loaded_adapters)
325+
return self._lora_request_info.labels(**self.universal_labels,
326+
max_lora=max_lora, running_lora_adapters=loaded_adapters)

0 commit comments

Comments
 (0)