Skip to content

Commit d8def44

Browse files
unamedkrclaude
andcommitted
Fix CLI: suppress HuggingFace warnings and pad_token_id message
- Redirect stderr during model loading (removes "Warning: unauthenticated", "fast path not available", "Loading weights" progress bar) - Pre-set pad_token_id before generate() (removes "Setting pad_token_id" message) - Set TRANSFORMERS_NO_ADVISORY_WARNINGS, HF_HUB_DISABLE_PROGRESS_BARS env vars - Clean output: only the question, answer, and KV analysis are shown Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 005de79 commit d8def44

1 file changed

Lines changed: 24 additions & 6 deletions

File tree

tools/tq_chat.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,9 @@ def run_chat(question, model, tokenizer):
115115

116116
print(f" {C.BOLD}{C.GREEN}A:{C.NC} ", end="", flush=True)
117117

118+
import contextlib, io
118119
t0 = time.time()
119-
with torch.no_grad():
120+
with torch.no_grad(), contextlib.redirect_stderr(io.StringIO()):
120121
out = model.generate(
121122
**inputs,
122123
max_new_tokens=300,
@@ -158,17 +159,34 @@ def main():
158159

159160
print_header()
160161

161-
# Load model
162+
# Load model (suppress noisy warnings)
162163
print(f" {C.DIM}Loading Qwen3.5-0.8B...{C.NC}", end="", flush=True)
164+
165+
import warnings
166+
import logging
167+
import contextlib, io
168+
warnings.filterwarnings("ignore")
169+
logging.disable(logging.WARNING)
170+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
171+
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
172+
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
173+
163174
import torch
164175
from transformers import AutoModelForCausalLM, AutoTokenizer
165176

166177
model_name = "Qwen/Qwen3.5-0.8B"
167-
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
168-
model = AutoModelForCausalLM.from_pretrained(
169-
model_name, trust_remote_code=True, dtype=torch.float32
170-
)
178+
with contextlib.redirect_stderr(io.StringIO()):
179+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
180+
model = AutoModelForCausalLM.from_pretrained(
181+
model_name, trust_remote_code=True, dtype=torch.float32
182+
)
171183
model.eval()
184+
185+
# Pre-set pad_token_id to suppress "Setting pad_token_id" message
186+
if tokenizer.pad_token_id is None:
187+
tokenizer.pad_token_id = tokenizer.eos_token_id
188+
model.generation_config.pad_token_id = tokenizer.eos_token_id
189+
172190
print(f" {C.GREEN}{C.NC}")
173191
print()
174192

0 commit comments

Comments
 (0)