Skip to content

Commit 62c75dc

Browse files
Update ai-model/data/generate_dataset.py
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 156edfa commit 62c75dc

1 file changed

Lines changed: 6 additions & 6 deletions

File tree

ai-model/data/generate_dataset.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,12 @@ def generate_examples(num_tasks: int = 2000, num_qa: int = 2000) -> List[Dict]:
9191

9292
def format_for_training(examples: List[Dict]) -> List[Dict]:
9393
"""Format examples for TinyLlama chat format."""
94-
# Define tags using chr() to avoid parsing issues
95-
sys_open = chr(60) + "|system|" + chr(62)
96-
user_open = chr(60) + "|user|" + chr(62)
97-
asst_open = chr(60) + "|assistant|" + chr(62)
98-
end_tag = chr(60) + "/s" + chr(62)
99-
newline = chr(10)
94+
# Define tags for TinyLlama chat format
95+
sys_open = "<|system|>"
96+
user_open = "<|user|>"
97+
asst_open = "<|assistant|>"
98+
end_tag = "</s>"
99+
newline = "\n"
100100

101101
formatted = []
102102
for ex in examples:

0 commit comments

Comments
 (0)