Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/minisgl/message/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ class UserReply(BaseFrontendMsg):
uid: int
incremental_output: str
finished: bool
reasoning_output: str = ""
12 changes: 12 additions & 0 deletions python/minisgl/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""
minisgl.parser
==============

Top-level package for output-parsing utilities.

Sub-packages
------------
:mod:`minisgl.parser.reasoning`
Identify and split reasoning blocks (e.g. ``<think>…</think>``) from
model-generated text, in both streaming and non-streaming modes.
"""
59 changes: 59 additions & 0 deletions python/minisgl/parser/reasoning/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""
minisgl.parser.reasoning
========================

Utilities for identifying and splitting reasoning blocks from model-generated
text. Supports both **non-streaming** (full-text) and **streaming**
(chunk-by-chunk) parsing.

Quick start
-----------
::

from minisgl.parser.reasoning import ReasoningParser

# Non-streaming
parser = ReasoningParser("qwen3")
result = parser.parse_full(full_text)
print(result.reasoning_text) # chain-of-thought
print(result.normal_text) # answer

# Streaming
parser = ReasoningParser("qwen3")
for raw_chunk in token_stream:
sc = parser.parse_stream(raw_chunk)
...
final = parser.flush()

Public API
----------
:class:`ReasoningParser`
Main entry point. Selects a detector by *model_type* and exposes
:meth:`~ReasoningParser.parse_full`, :meth:`~ReasoningParser.parse_stream`,
:meth:`~ReasoningParser.flush`, and :meth:`~ReasoningParser.stream_iter`.

:class:`ParseResult`
Non-streaming result with ``reasoning_text`` and ``normal_text``.

:class:`StreamChunk`
Streaming result with ``reasoning_delta`` and ``normal_delta``.

:class:`BaseDetector`
ABC for custom detectors (subclass to add new reasoning formats).

:class:`ThinkTagDetector`
Concrete detector for the ``<think>…</think>`` format used by
Qwen3-Thinking.
"""

from .base import BaseDetector, ParseResult, StreamChunk
from .parser import ReasoningParser
from .think_tag import ThinkTagDetector

__all__ = [
"ReasoningParser",
"ParseResult",
"StreamChunk",
"BaseDetector",
"ThinkTagDetector",
]
81 changes: 81 additions & 0 deletions python/minisgl/parser/reasoning/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass


@dataclass
class ParseResult:
"""Non-streaming parse output."""

reasoning_text: str
"""Content extracted from inside the reasoning block."""

normal_text: str
"""Remaining content outside the reasoning block."""


@dataclass
class StreamChunk:
"""Incremental output produced by a single :meth:`ReasoningParser.parse_stream` call."""

reasoning_delta: str
"""New reasoning content decoded in this chunk (may be empty)."""

normal_delta: str
"""New normal content decoded in this chunk (may be empty)."""


class BaseDetector(ABC):
"""
Abstract base for reasoning-block detectors.

A detector knows the start/end markers that delimit a reasoning block
and can parse both complete text (non-streaming) and incremental chunks
(streaming) through the helpers in :class:`ReasoningParser`.

Concrete subclasses must implement :attr:`start_tag` and :attr:`end_tag`.
They may also override :meth:`parse_full` for model-specific logic.
"""

@property
@abstractmethod
def start_tag(self) -> str:
"""Opening delimiter of the reasoning block (e.g. ``"<think>"```)."""

@property
@abstractmethod
def end_tag(self) -> str:
"""Closing delimiter of the reasoning block (e.g. ``"</think>"```)."""

def parse_full(self, text: str) -> ParseResult:
"""
Extract reasoning and normal text from a complete, fully-decoded
generation string (non-streaming path).

The default implementation does a single forward scan for the
first occurrence of :attr:`start_tag` / :attr:`end_tag`. Subclasses
may override for more sophisticated (e.g. multi-block) handling.
"""
start = self.start_tag
end = self.end_tag

start_idx = text.find(start)
if start_idx == -1:
return ParseResult(reasoning_text="", normal_text=text)

content_start = start_idx + len(start)
end_idx = text.find(end, content_start)

if end_idx == -1:
# Reasoning block was never closed – treat everything after the
# start tag as reasoning content and keep the preceding text as
# normal output.
return ParseResult(
reasoning_text=text[content_start:],
normal_text=text[:start_idx],
)

reasoning = text[content_start:end_idx]
normal = text[:start_idx] + text[end_idx + len(end):]
return ParseResult(reasoning_text=reasoning, normal_text=normal)
Loading