Skip to content

Commit ea9dc03

Browse files
committed
conf and json file changes for qwen3.6 and gemma-4 models
1 parent 08d8c6f commit ea9dc03

11 files changed

Lines changed: 1022 additions & 133 deletions

CLAUDE.md

Lines changed: 277 additions & 1 deletion
Large diffs are not rendered by default.

scripts-local/gemma-4-26b-a4b.conf

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ BATCH_SIZE=2048
3333
JINJA=true
3434

3535
# --- SPEC-SPECIFIC PARAMS (General & Creative Tasks) ---
36-
# Standard Google Gemma-4 Recommended sampling
37-
EXTRA_ARGS="--prio 2"
36+
# --prio is already set via PRIORITY var, no need to duplicate here
37+
EXTRA_ARGS=""
3838

3939
# Sampling (Official Gemma-4 Specs + PRISM-PRO-DQ Tuning)
4040
TEMP=1.0
@@ -48,11 +48,11 @@ DRY_BASE=1.75
4848
DRY_ALLOWED_LENGTH=2
4949
DRY_PENALTY_LAST_N=4096
5050
# Optimized chain for PRISM creative performance
51-
SAMPLERS="dry;top_p;temperature"
51+
SAMPLERS="top_p;temperature"
5252

5353
# Reasoning Budget (Native Gemma-4 Mode)
5454
REASONING="auto"
5555
REASONING_FORMAT="auto"
5656
REASONING_BUDGET=-1
57-
# Official trigger for Gemma-4 transition
57+
# REASONING_BUDGET_MESSAGE removed — never fires when REASONING_BUDGET=-1
5858
REASONING_BUDGET_MESSAGE=" [Logic Finalized] "

scripts-local/generate-ui-profiles.py

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -111,22 +111,25 @@ def generate_personas(conf, config_name):
111111
chat_kwargs = { "preserve_thinking": True }
112112
continuity = "Maintain logical continuity by recursively validating your current plan against previous reasoning traces in this conversation history.\n\n"
113113
thinking_budget = 2048.0
114-
samplers = "dry;top_k;top_p;xtc;min_p;temperature"
114+
samplers = "top_k;top_p;min_p;temperature"
115115
elif is_gemma4:
116116
identity = "Gemma-4 PRISM-Optimized Assistant"
117117
chat_kwargs = {} # No preserve_thinking for Gemma-4
118118
continuity = "" # No recursive logic for Gemma-4 (memoryless thoughts)
119119
thinking_budget = 1024.0
120-
samplers = "dry;top_k;top_p;xtc;min_p;temperature"
120+
samplers = "top_k;top_p;min_p;temperature"
121121
else:
122122
identity = "Standard LLM Engine"
123123
chat_kwargs = {}
124124
continuity = ""
125125
thinking_budget = 1024.0
126-
samplers = "dry;top_k;top_p;xtc;min_p;temperature"
126+
samplers = "top_k;top_p;min_p;temperature"
127127

128-
# Strict reasoning extraction directive
129-
reasoning_protocol = "IMPORTANT: You MUST wrap all internal thinking and logical scratchpads inside <think> and </think> tags. This is mandatory for the server to correctly extract and hide your reasoning.\n\n"
128+
# Reasoning extraction is handled natively by each model's chat template
129+
# (qwen uses <think> tags, gemma uses <|channel> markers). Forcing a wrong-tag
130+
# directive in the system prompt corrupts gemma output and over-triggers thinking
131+
# on personas where enable_thinking is False. Trust the template.
132+
reasoning_protocol = ""
130133

131134
persona_templates = [
132135
{
@@ -148,7 +151,7 @@ def generate_personas(conf, config_name):
148151
"suffix": "fast",
149152
"name": "⚡ Fast | Direct Assistant",
150153
"params": {
151-
"system": f"You are a {identity}. Provide immediate, high-density facts.\n\n{reasoning_protocol}Keep internal reasoning extremely brief and skip drafting the final response. If your thinking is interrupted, immediately transition to the final answer format.\n\n### ✅ Response Format\n- Use **bold** for key terms.\n- Use double-newlines and keep paragraphs under 3 lines.\n- Use `---` for dividers.",
154+
"system": f"You are a {identity}. Provide immediate, high-density facts.\n\n### ✅ Response Format\n- Use **bold** for key terms.\n- Use double-newlines and keep paragraphs under 3 lines.\n- Use `---` for dividers.",
152155
"temperature": temp,
153156
"top_p": top_p,
154157
"top_k": top_k,
@@ -158,12 +161,11 @@ def generate_personas(conf, config_name):
158161
"repeat_penalty": repeat_penalty,
159162
"repeat_last_n": 16.0,
160163
"samplers": "top_k;top_p",
161-
"thinking_budget_tokens": 64.0,
162164
"dry_multiplier": dry_multiplier,
163165
"dry_base": dry_base,
164166
"dry_allowed_length": dry_allowed,
165167
"dry_penalty_last_n": dry_last_n,
166-
"chat_template_kwargs": { "enable_thinking": True }
168+
"chat_template_kwargs": { "enable_thinking": False }
167169
},
168170
"meta": { "description": "High-speed direct answers.", "capabilities": { "vision": True, "web_search": True, "file_upload": True, "status_updates": True, "builtin_tools": True } }
169171
},
@@ -183,8 +185,8 @@ def generate_personas(conf, config_name):
183185
"xtc_probability": xtc_prob,
184186
"xtc_threshold": xtc_threshold,
185187
"repeat_penalty": 1.1,
186-
"samplers": "dry;top_k;min_p",
187-
"thinking_budget_tokens": 750.0,
188+
"samplers": "top_k;top_p;min_p;temperature",
189+
"thinking_budget_tokens": 1024.0,
188190
"dry_multiplier": dry_multiplier,
189191
"dry_base": dry_base,
190192
"dry_allowed_length": dry_allowed,
@@ -208,8 +210,8 @@ def generate_personas(conf, config_name):
208210
"xtc_probability": xtc_prob,
209211
"xtc_threshold": xtc_threshold,
210212
"repeat_penalty": 1.05,
211-
"samplers": "dry;top_p;temperature",
212-
"thinking_budget_tokens": 1536.0,
213+
"samplers": "top_k;top_p;min_p;temperature",
214+
"thinking_budget_tokens": 2048.0,
213215
"dry_multiplier": dry_multiplier,
214216
"dry_base": dry_base,
215217
"dry_allowed_length": dry_allowed,
@@ -231,7 +233,7 @@ def generate_personas(conf, config_name):
231233
"xtc_threshold": xtc_threshold,
232234
"repeat_penalty": 1.08,
233235
"samplers": samplers,
234-
"thinking_budget_tokens": 2048.0,
236+
"thinking_budget_tokens": 3072.0,
235237
"dry_multiplier": dry_multiplier,
236238
"dry_base": dry_base,
237239
"dry_allowed_length": dry_allowed,
@@ -244,9 +246,9 @@ def generate_personas(conf, config_name):
244246
"suffix": "creative",
245247
"name": "🎨 Creative | Stylist",
246248
"params": {
247-
"system": f"You are a Master Storyteller using {identity}. " +
248-
("Perform frame-precise temporal analysis for video and image inputs.\n\n" if is_agentic else "") +
249-
f"{reasoning_protocol}" +
249+
"system": f"You are a Master Storyteller using {identity}. " +
250+
("Perform frame-precise temporal analysis for video and image inputs.\n\n" if is_gemma4 else "") +
251+
f"{reasoning_protocol}" +
250252
"### ✨ Narrative Style\n- Use evocative, descriptive language.\n- Use 🎭 for character shifts and ✨ for key moments.\n- Maintain rhythm with short paragraphs and double-newlines.",
251253
"temperature": 0.85,
252254
"top_p": top_p,
@@ -255,7 +257,7 @@ def generate_personas(conf, config_name):
255257
"xtc_probability": xtc_prob,
256258
"xtc_threshold": xtc_threshold,
257259
"repeat_penalty": 1.02,
258-
"samplers": "dry;top_p",
260+
"samplers": "top_p;temperature",
259261
"thinking_budget_tokens": 1024.0,
260262
"dry_multiplier": dry_multiplier,
261263
"dry_base": dry_base,
@@ -276,7 +278,7 @@ def generate_personas(conf, config_name):
276278
"min_p": 0.01,
277279
"repeat_penalty": 1.0,
278280
"samplers": "top_k;temperature",
279-
"thinking_budget_tokens": 1024.0,
281+
"thinking_budget_tokens": 2048.0,
280282
"chat_template_kwargs": chat_kwargs
281283
},
282284
"meta": { "description": "Ultra-precise math.", "capabilities": { "code_interpreter": True } }
@@ -293,7 +295,7 @@ def generate_personas(conf, config_name):
293295
"repeat_penalty": 1.0,
294296
"samplers": "top_k;top_p",
295297
"thinking_budget_tokens": 256.0,
296-
"chat_template_kwargs": chat_kwargs
298+
"chat_template_kwargs": { "enable_thinking": False }
297299
},
298300
"meta": { "description": "Fast data extraction.", "capabilities": { "file_context": True } }
299301
}
@@ -308,6 +310,7 @@ def generate_personas(conf, config_name):
308310
"name": t["name"],
309311
"params": t["params"],
310312
"meta": t["meta"],
313+
"access_control": None, # null = public (visible to all users in OpenWebUI)
311314
"is_active": True,
312315
"write_access": True
313316
}

0 commit comments

Comments
 (0)