@@ -97,11 +97,17 @@ public static void tearDown() {
9797 * Qwen3 enters thinking mode by default. With {@code reasoning_format=deepseek} set
9898 * at model level, the thinking tokens must appear in {@code reasoning_content} and
9999 * the final answer must appear in {@code content}.
100+ *
101+ * <p>{@code temperature=0} (greedy sampling) is used so the model deterministically
102+ * enters the {@code <think>} block on every platform, including Metal (macOS arm64)
103+ * where GPU floating-point arithmetic can produce slightly different logit
104+ * distributions and occasionally sample a non-thinking first token.
100105 */
101106 @ Test
102107 public void testThinkingDefault_reasoningContentAndAnswerPresent () {
103108 InferenceParameters params = new InferenceParameters ("" )
104109 .withMessages (null , Collections .singletonList (new Pair <>("user" , "What is 2+2?" )))
110+ .withTemperature (0.0f )
105111 .withNPredict (N_PREDICT );
106112
107113 String json = model .chatComplete (params );
@@ -132,11 +138,17 @@ public void testThinkingDefault_reasoningContentAndAnswerPresent() {
132138 * that is the signal to remove this test and enable
133139 * {@link #testReasoningBudgetZero_expectedBehavior_suppressesThinking}.
134140 * Tracked in <a href="https://github.com/ggml-org/llama.cpp/pull/23116">llama.cpp PR #23116</a>.
141+ *
142+ * <p>{@code temperature=0} (greedy sampling) is used so the model deterministically
143+ * enters the {@code <think>} block on every platform. Without it, Metal (macOS arm64)
144+ * occasionally samples a non-thinking first token even when the budget is unlimited
145+ * (due to the bug), causing a spurious test failure.
135146 */
136147 @ Test
137148 public void testReasoningBudgetZero_parameterAccepted_thinkingNotSuppressed () {
138149 InferenceParameters params = new InferenceParameters ("" )
139150 .withMessages (null , Collections .singletonList (new Pair <>("user" , "What is 2+2?" )))
151+ .withTemperature (0.0f )
140152 .withReasoningBudgetTokens (0 )
141153 .withNPredict (N_PREDICT );
142154
0 commit comments