Skip to content

Commit 9053ffd

Browse files
fix(mac-chat): stop [stream] progress lines interleaving with the answer (#153)
The per-block '[stream] blk=.. t=..s' lines went to stderr while the answer delta went to stdout; on a shared terminal they interleaved INTO the text (在计算机[stream] blk=1...). Emit the timing line ONLY on the non-interactive (scripted/bridge) path; the interactive CLI now streams ONLY the clean answer delta. Add --chat-stream-stdout so a non-tty bridge run can capture the exact clean live format for validation. Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: FluffyAIcode <FluffyAIcode@users.noreply.github.com>
1 parent d7bbcaf commit 9053ffd

2 files changed

Lines changed: 25 additions & 7 deletions

File tree

inference_engine/bridge/manifest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -767,7 +767,8 @@ def _harness_preset(
767767
"--s5-exact-full-attn", "--fused-specdecode", "--force-f-theta",
768768
"--sink-size", "4", "--window-size", "64", "--block-size", "4",
769769
"--max-new-tokens", "{max_new_tokens}", "--ignore-turn-stop",
770-
"--chat", "--chat-scripted", "根据pow的机制,给出完整的c代码实现",
770+
"--chat", "--chat-stream-stdout",
771+
"--chat-scripted", "根据pow的机制,给出完整的c代码实现",
771772
"--output", "results/research/chat_stream_probe_2815.json",
772773
),
773774
),

scripts/research/k3_integrated_niah_eval_mac.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,11 @@ def parse_args() -> argparse.Namespace:
180180
ap.add_argument("--chat-scripted", default=None,
181181
help="Non-interactive chat: '||'-separated user turns "
182182
"(for Mac-bridge verification); writes a transcript.")
183+
ap.add_argument("--chat-stream-stdout", action="store_true",
184+
help="In scripted chat, stream the clean answer delta to "
185+
"stdout (as the interactive CLI does) instead of the "
186+
"per-block [stream] timing lines — lets a non-tty bridge "
187+
"run capture the exact live output format.")
183188
ap.add_argument("--chat-native-ref", action="store_true",
184189
help="DIAGNOSTIC opt-in: before each chat turn, also run a "
185190
"plain NATIVE greedy AR decode of the SAME prompt for "
@@ -870,15 +875,21 @@ def cb(toks: List[int]) -> None:
870875
except TypeError:
871876
txt = tokenizer.decode(toks)
872877
if to_stdout:
878+
# Interactive: emit ONLY the clean answer delta to stdout.
879+
# (No per-block progress line here — stderr would interleave
880+
# with the streamed text in the terminal and mangle it.)
873881
delta = txt[st["chars"]:]
874882
if delta:
875883
sys.stdout.write(delta)
876884
sys.stdout.flush()
877885
st["chars"] = len(txt)
878-
sys.stderr.write(
879-
f"[stream] blk={st['blk']} tok={len(toks)} "
880-
f"t={time.perf_counter() - st['t0']:.1f}s\n")
881-
sys.stderr.flush()
886+
else:
887+
# Non-interactive (bridge/scripted): timing-only progress to
888+
# stderr (proves streaming / liveness in the captured log).
889+
sys.stderr.write(
890+
f"[stream] blk={st['blk']} tok={len(toks)} "
891+
f"t={time.perf_counter() - st['t0']:.1f}s\n")
892+
sys.stderr.flush()
882893
return cb
883894

884895
print(f"[chat] FULL fused engine: verifier={args.verifier_path} "
@@ -893,8 +904,14 @@ def cb(toks: List[int]) -> None:
893904
transcript = []
894905
for u in turns:
895906
history.append({"role": "user", "content": u})
896-
res = _gen_turn(_encode_chat(history),
897-
on_commit=_make_stream_cb(to_stdout=False))
907+
if args.chat_stream_stdout:
908+
sys.stdout.write(f"\ngemma-4 [{u[:24]}]> ")
909+
sys.stdout.flush()
910+
res = _gen_turn(_encode_chat(history), on_commit=_make_stream_cb(
911+
to_stdout=args.chat_stream_stdout))
912+
if args.chat_stream_stdout:
913+
sys.stdout.write("\n")
914+
sys.stdout.flush()
898915
history.append({"role": "assistant", "content": res["text"]})
899916
tps = (res["decode_tokens"] / res["decode_s"]
900917
if res["decode_s"] > 0 else 0.0)

0 commit comments

Comments
 (0)