Skip to content

Commit f5bfe1d

Browse files
committed
Update documentation from main repository
1 parent 64fbece commit f5bfe1d

File tree

3 files changed

+11
-11
lines changed

3 files changed

+11
-11
lines changed

docs/api/sllm-store-cli.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ pip install serverless-llm-store
3333
```
3434

3535
## Example Workflow
36-
1. Firstly, start the ServerlessLLM Store server. By default, it uses ./models as the storage path.
36+
1. Firstly, start the ServerlessLLM Store server. By default, it uses ~/models as the storage path.
3737
Launch the checkpoint store server in a separate process:
3838
``` bash
3939
# 'mem_pool_size' is the maximum size of the memory pool in GB. It should be larger than the model size.

docs/deployment/single_machine.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ ray start --address=0.0.0.0:6379 --num-cpus=4 --num-gpus=1 \
132132

133133
### 2. Start the ServerlessLLM Store Server
134134

135-
Next, start the ServerlessLLM Store server. By default, it uses `./models` as the storage path.
135+
Next, start the ServerlessLLM Store server. By default, it uses `~/models` as the storage path.
136136

137137
Open a new terminal and run:
138138

docs/store/quickstart.md

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ We highly recommend using a fast storage device (e.g., NVMe SSD) to store the mo
5050
For example, create a directory `models` on the NVMe SSD and link it to the local path.
5151
```bash
5252
mkdir -p /mnt/nvme/models # Replace '/mnt/nvme' with your NVMe SSD path.
53-
ln -s /mnt/nvme/models ./models
53+
ln -s /mnt/nvme/models ~/models
5454
```
5555
:::
5656

@@ -63,8 +63,8 @@ import torch
6363
from transformers import AutoModelForCausalLM
6464
model = AutoModelForCausalLM.from_pretrained('facebook/opt-1.3b', torch_dtype=torch.float16)
6565

66-
# Replace './models' with your local path.
67-
save_model(model, './models/facebook/opt-1.3b')
66+
# Replace '~/models' with your local path.
67+
save_model(model, '~/models/facebook/opt-1.3b')
6868
```
6969

7070
2. Launch the checkpoint store server in a separate process:
@@ -94,7 +94,7 @@ for i in range(num_gpus):
9494
torch.cuda.synchronize()
9595

9696
start = time.time()
97-
model = load_model("facebook/opt-1.3b", device_map="auto", torch_dtype=torch.float16, storage_path="./models/", fully_parallel=True)
97+
model = load_model("facebook/opt-1.3b", device_map="auto", torch_dtype=torch.float16, storage_path=os.path.expanduser("~/models"), fully_parallel=True)
9898
# Please note the loading time depends on the model size and the hardware bandwidth.
9999
print(f"Model loading time: {time.time() - start:.2f}s")
100100

@@ -164,7 +164,7 @@ from vllm import LLM, SamplingParams
164164

165165
import os
166166

167-
storage_path = os.getenv("STORAGE_PATH", "./models")
167+
storage_path = os.getenv("STORAGE_PATH", os.path.expanduser("~/models"))
168168
model_name = "facebook/opt-1.3b"
169169
model_path = os.path.join(storage_path, model_name)
170170

@@ -211,8 +211,8 @@ from sllm_store.transformers import save_lora
211211
from transformers import AutoModelForCausalLM
212212
model = AutoModelForCausalLM.from_pretrained('facebook/opt-1.3b', torch_dtype=torch.float16) -->
213213
214-
# Replace './models' with your local path.
215-
save_lora(adapter, './models/facebook/opt-1.3b')
214+
# Replace '~/models' with your local path.
215+
save_lora(adapter, '~/models/facebook/opt-1.3b')
216216
```
217217

218218
2. Launch the checkpoint store server in a separate process:
@@ -227,9 +227,9 @@ import time
227227
import torch
228228
from sllm_store.transformers import load_model, load_lora
229229
230-
model = load_model("facebook/opt-1.3b", device_map="auto", torch_dtype=torch.float16, storage_path="./models/", fully_parallel=True)
230+
model = load_model("facebook/opt-1.3b", device_map="auto", torch_dtype=torch.float16, storage_path=os.path.expanduser("~/models"), fully_parallel=True)
231231
232-
model = load_lora("facebook/opt-1.3b", adapter_name="demo_lora", adapter_path="ft_facebook/opt-1.3b_adapter1", device_map="auto", torch_dtype=torch.float16, storage_path="./models/")
232+
model = load_lora("facebook/opt-1.3b", adapter_name="demo_lora", adapter_path="ft_facebook/opt-1.3b_adapter1", device_map="auto", torch_dtype=torch.float16, storage_path=os.path.expanduser("~/models"))
233233
234234
# Please note the loading time depends on the base model size and the hardware bandwidth.
235235
print(f"Model loading time: {time.time() - start:.2f}s")

0 commit comments

Comments
 (0)