This repository was archived by the owner on Jul 16, 2025. It is now read-only.
forked from meta-llama/llama
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexample_chat_completion.py
More file actions
52 lines (43 loc) · 1.81 KB
/
Copy pathexample_chat_completion.py
File metadata and controls
52 lines (43 loc) · 1.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Copyright (c) Meta Platforms, Inc. and affiliates.
# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
from typing import List, Optional
import fire
from llama import Llama, Dialog
def main(
ckpt_dir: str,
tokenizer_path: str,
temperature: float = 0.6,
top_p: float = 0.9,
max_seq_len: int = 128,
max_gen_len: Optional[int] = None,
):
"""
Entry point of the program for generating text using a pretrained model.
Args:
ckpt_dir (str): The directory containing checkpoint files for the pretrained model.
tokenizer_path (str): The path to the tokenizer model used for text encoding/decoding.
temperature (float, optional): The temperature value for controlling randomness in generation.
Defaults to 0.6.
top_p (float, optional): The top-p sampling parameter for controlling diversity in generation.
Defaults to 0.9.
max_seq_len (int, optional): The maximum sequence length for input prompts. Defaults to 512.
max_batch_size (int, optional): The maximum batch size for generating sequences. Defaults to 8.
max_gen_len (int, optional): The maximum length of generated sequences. If None, it will be
set to the model's max sequence length. Defaults to None.
"""
generator = Llama.build(
ckpt_dir=ckpt_dir,
tokenizer_path=tokenizer_path,
max_seq_len=max_seq_len,
)
dialog: Dialog = [{"role": "user", "content": "Who are you?"}]
for msg in dialog:
print(f"{msg['role'].capitalize()}: {msg['content']}\n")
generator.chat_completion(
dialog, # type: ignore
max_gen_len=max_gen_len,
temperature=temperature,
top_p=top_p,
)
if __name__ == "__main__":
fire.Fire(main)