Skip to content

Commit f79b8fa

Browse files
Copilotstephentoub
andauthored
Add gpt-5.4 to tiktoken tokenizer (#7591)
* Initial plan * Add gpt-5.4 tokenizer mapping Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com>
1 parent def7a4a commit f79b8fa

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,6 +1044,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo
10441044
( "o4-mini-", ModelEncoding.O200kBase ), // e.g. o4-mini
10451045

10461046
// chat
1047+
( "gpt-5.4-", ModelEncoding.O200kBase ),
10471048
( "gpt-5.3-", ModelEncoding.O200kBase ),
10481049
( "gpt-5.2-", ModelEncoding.O200kBase ),
10491050
( "gpt-5.1-", ModelEncoding.O200kBase ),
@@ -1074,6 +1075,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo
10741075
{ "o4-mini", ModelEncoding.O200kBase },
10751076

10761077
// chat
1078+
{ "gpt-5.4", ModelEncoding.O200kBase },
10771079
{ "gpt-5.3", ModelEncoding.O200kBase },
10781080
{ "gpt-5.2", ModelEncoding.O200kBase },
10791081
{ "gpt-5.1", ModelEncoding.O200kBase },

test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ public class TiktokenTests
3939
public static Tokenizer GPT5_1 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.1");
4040
public static Tokenizer GPT5_2 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.2");
4141
public static Tokenizer GPT5_3 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.3");
42+
public static Tokenizer GPT5_4 { get; } = TiktokenTokenizer.CreateForModel("gpt-5.4");
4243
public static Tokenizer Phi4 { get; } = TiktokenTokenizer.CreateForModel("phi-4");
4344
public static TiktokenTokenizer GptOss { get; } = TiktokenTokenizer.CreateForModel("gpt-oss-20b");
4445

@@ -424,6 +425,8 @@ public void TestEncodeR50kBase()
424425
[InlineData("gpt-5.2-mini")]
425426
[InlineData("gpt-5.3")]
426427
[InlineData("gpt-5.3-mini")]
428+
[InlineData("gpt-5.4")]
429+
[InlineData("gpt-5.4-nano")]
427430
[InlineData("chatgpt-4o-")]
428431
[InlineData("gpt-4")]
429432
[InlineData("gpt-4-")]
@@ -545,6 +548,7 @@ public void TestEncodingNamesNegativeCases()
545548
[InlineData("gpt-5.1")]
546549
[InlineData("gpt-5.2")]
547550
[InlineData("gpt-5.3")]
551+
[InlineData("gpt-5.4")]
548552
[InlineData("o1")]
549553
[InlineData("o3")]
550554
[InlineData("o4-mini")]
@@ -924,4 +928,3 @@ public void TestLargeInputConsistency(int length)
924928
}
925929
}
926930
}
927-

0 commit comments

Comments
 (0)