@@ -50,7 +50,7 @@ We highly recommend using a fast storage device (e.g., NVMe SSD) to store the mo
5050For example, create a directory ` models ` on the NVMe SSD and link it to the local path.
5151``` bash
5252mkdir -p /mnt/nvme/models # Replace '/mnt/nvme' with your NVMe SSD path.
53- ln -s /mnt/nvme/models . /models
53+ ln -s /mnt/nvme/models ~ /models
5454```
5555:::
5656
@@ -63,8 +63,8 @@ import torch
6363from transformers import AutoModelForCausalLM
6464model = AutoModelForCausalLM.from_pretrained(' facebook/opt-1.3b' , torch_dtype = torch.float16)
6565
66- # Replace '. /models' with your local path.
67- save_model(model, ' . /models/facebook/opt-1.3b' )
66+ # Replace '~ /models' with your local path.
67+ save_model(model, ' ~ /models/facebook/opt-1.3b' )
6868```
6969
70702 . Launch the checkpoint store server in a separate process:
@@ -94,7 +94,7 @@ for i in range(num_gpus):
9494 torch.cuda.synchronize()
9595
9696start = time.time()
97- model = load_model(" facebook/opt-1.3b" , device_map = " auto" , torch_dtype = torch.float16, storage_path = " . /models/ " , fully_parallel = True )
97+ model = load_model(" facebook/opt-1.3b" , device_map = " auto" , torch_dtype = torch.float16, storage_path = os.path.expanduser( " ~ /models" ) , fully_parallel = True )
9898# Please note the loading time depends on the model size and the hardware bandwidth.
9999print (f " Model loading time: { time.time() - start:.2f } s " )
100100
@@ -164,7 +164,7 @@ from vllm import LLM, SamplingParams
164164
165165import os
166166
167- storage_path = os.getenv(" STORAGE_PATH" , " . /models" )
167+ storage_path = os.getenv(" STORAGE_PATH" , os.path.expanduser( " ~ /models" ) )
168168model_name = " facebook/opt-1.3b"
169169model_path = os.path.join(storage_path, model_name)
170170
@@ -211,8 +211,8 @@ from sllm_store.transformers import save_lora
211211from transformers import AutoModelForCausalLM
212212model = AutoModelForCausalLM.from_pretrained('facebook/opt-1.3b', torch_dtype=torch.float16) -->
213213
214- # Replace '. /models' with your local path.
215- save_lora(adapter, '. /models/facebook/opt-1.3b')
214+ # Replace '~ /models' with your local path.
215+ save_lora(adapter, '~ /models/facebook/opt-1.3b')
216216```
217217
2182182 . Launch the checkpoint store server in a separate process:
@@ -227,9 +227,9 @@ import time
227227import torch
228228from sllm_store.transformers import load_model, load_lora
229229
230- model = load_model("facebook/opt-1.3b", device_map="auto", torch_dtype=torch.float16, storage_path=". /models/" , fully_parallel=True)
230+ model = load_model("facebook/opt-1.3b", device_map="auto", torch_dtype=torch.float16, storage_path=os.path.expanduser("~ /models") , fully_parallel=True)
231231
232- model = load_lora("facebook/opt-1.3b", adapter_name="demo_lora", adapter_path="ft_facebook/opt-1.3b_adapter1", device_map="auto", torch_dtype=torch.float16, storage_path=". /models/" )
232+ model = load_lora("facebook/opt-1.3b", adapter_name="demo_lora", adapter_path="ft_facebook/opt-1.3b_adapter1", device_map="auto", torch_dtype=torch.float16, storage_path=os.path.expanduser("~ /models") )
233233
234234# Please note the loading time depends on the base model size and the hardware bandwidth.
235235print(f"Model loading time: {time.time() - start:.2f}s")
0 commit comments