Skip to content

Commit 6ce324c

Browse files
committed
Merging 'main' to 'amangu-lora'
2 parents 5bda29b + a75f64f commit 6ce324c

2 files changed

Lines changed: 15 additions & 0 deletions

File tree

experimental/jax/inference/config/config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
class ModelId:
2121
llama_2_7b_chat_hf = "meta-llama/Llama-2-7b-chat-hf"
22+
llama_2_70b_chat_hf = "meta-llama/Llama-2-70b-chat-hf"
2223

2324

2425
@dataclasses.dataclass
@@ -43,6 +44,15 @@ class Config:
4344
page_size=128,
4445
hbm_utilization=0.875,
4546
),
47+
ModelId.llama_2_70b_chat_hf: InferenceParams(
48+
model_id=ModelId.llama_2_70b_chat_hf,
49+
batch_size=100,
50+
max_seq_length=2048,
51+
max_input_length=1024,
52+
prefill_chunk_sizes=[128, 256, 512, 1024],
53+
page_size=128,
54+
hbm_utilization=0.875,
55+
),
4656
}
4757

4858
@classmethod

jetstream/core/server_lib.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,11 @@ def run(
266266
Returns:
267267
JetStreamServer that wraps the grpc server and orchestrator driver.
268268
"""
269+
# TODO: Deleting the lora_input_adapters_path for now.
270+
# Planning to use it in next big PR. Currently accomodating it
271+
# to fix the params mismatch between maxText and JetStream
272+
del lora_input_adapters_path
273+
269274
server_start_time = time.time()
270275
logging.info("Kicking off gRPC server.")
271276
# Setup Prometheus server

0 commit comments

Comments
 (0)