jirispilka
diff --git a/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/main.py‎
Lines changed: 6 additions & 1 deletion b/‎src/main.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎src/utils.py‎
Lines changed: 1 addition & 1 deletion
@@ -1,5 +1,9 @@
 # Change Log
 
+## 0.2.7 (2025-09-18)
+
+- Update tiktoken to support the latest models.
+
 ## 0.2.6 (2025-08-15)
 
 - Fix issue with whitespace in the input parameters.
 
@@ -121,7 +121,12 @@ async def create_files_from_dataset(
         data = [{key: get_nested_value(d, key) for key in actor_input.datasetFields} for d in data]
         data = [d for d in data if d]
 
-    if encoding := assistant and tiktoken.encoding_for_model(assistant.model) or None:
+    if assistant:
+        try:
+            encoding = tiktoken.encoding_for_model(assistant.model)
+        except KeyError:
+            encoding = tiktoken.get_encoding("o200k_base")
+            Actor.log.warning("Model %s not found. Using cl200k_base encoding", assistant.model)
         data = await split_data_if_required(data, encoding)
     else:
         data = [data]
 
@@ -125,7 +125,7 @@ def split_data_into_batches(data: list, max_tokens: int, encoding: tiktoken.core
 
     Example:
     >>> d = [{"name": "Alice"}, {"name": "Bob"}, {"name": "Carol"}]
-    >>> enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
+    >>> enc = tiktoken.encoding_for_model("gpt-5-mini")
     >>> batches = split_data_into_batches(d, 15, enc)
     >>> print(batches)
     [[{'name': 'Alice'}, {'name': 'Bob'}], [{'name': 'Carol'}]]