From 7ab221e5a74e012f59ad2a458b01c6c2b3255989 Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 01:05:27 +0300
Subject: [PATCH 01/14] skeleton

---
 scripts/changelog_tool/changelog-tool         |  28 ++++
 scripts/changelog_tool/changelog.yaml         |   4 +
 .../changelog_tool/changelog_tool/__init__.py |   0
 .../changelog_tool/collect/__init__.py        |   0
 .../changelog_tool/collect/command.py         |   9 ++
 .../changelog_tool/collect/config.py          |   7 +
 .../changelog_tool/common/__init__.py         |   0
 .../changelog_tool/common/git.py              | 149 ++++++++++++++++++
 .../changelog_tool/changelog_tool/config.py   |  13 ++
 scripts/changelog_tool/requirements.txt       |   3 +
 10 files changed, 213 insertions(+)
 create mode 100755 scripts/changelog_tool/changelog-tool
 create mode 100644 scripts/changelog_tool/changelog.yaml
 create mode 100644 scripts/changelog_tool/changelog_tool/__init__.py
 create mode 100644 scripts/changelog_tool/changelog_tool/collect/__init__.py
 create mode 100644 scripts/changelog_tool/changelog_tool/collect/command.py
 create mode 100644 scripts/changelog_tool/changelog_tool/collect/config.py
 create mode 100644 scripts/changelog_tool/changelog_tool/common/__init__.py
 create mode 100644 scripts/changelog_tool/changelog_tool/common/git.py
 create mode 100644 scripts/changelog_tool/changelog_tool/config.py
 create mode 100644 scripts/changelog_tool/requirements.txt

diff --git a/scripts/changelog_tool/changelog-tool b/scripts/changelog_tool/changelog-tool
new file mode 100755
index 000000000000..0a3b27939cd2
--- /dev/null
+++ b/scripts/changelog_tool/changelog-tool
@@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+import pathlib
+import click
+
+import changelog_tool.config as cfg
+import changelog_tool.collect.command as collect_cmd
+
+@click.group()
+@click.option('--config', default='changelog.yaml')
+@click.pass_context
+def cli(ctx: click.Context, config: str):
+    ctx.ensure_object(dict)
+    ctx.obj["CONFIG"] = cfg.parse_config(pathlib.Path(config))
+
+@cli.command()
+@click.option('--from-sha')
+@click.option('--to-sha')
+@click.option('--repo-path', type=pathlib.Path)
+@click.pass_context
+def collect(ctx: click.Context, from_sha: str | None, to_sha: str | None, repo_path: pathlib.Path | None):
+    from_sha = from_sha or ctx.obj["CONFIG"].collect.from_sha
+    to_sha = to_sha or ctx.obj["CONFIG"].collect.to_sha
+    repo_path = repo_path or ctx.obj["CONFIG"].collect.repo_path
+
+    collect_cmd.collect(from_sha, to_sha, repo_path)
+
+if __name__ == '__main__':
+    cli()
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog.yaml b/scripts/changelog_tool/changelog.yaml
new file mode 100644
index 000000000000..bd9e8920d3fd
--- /dev/null
+++ b/scripts/changelog_tool/changelog.yaml
@@ -0,0 +1,4 @@
+collect:
+  from_sha: c580979b522f43ea1ab9cd55033cd353d52844f6
+  to_sha: HEAD
+  repo_path: ../..
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/__init__.py b/scripts/changelog_tool/changelog_tool/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/scripts/changelog_tool/changelog_tool/collect/__init__.py b/scripts/changelog_tool/changelog_tool/collect/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/scripts/changelog_tool/changelog_tool/collect/command.py b/scripts/changelog_tool/changelog_tool/collect/command.py
new file mode 100644
index 000000000000..5d21d57003bf
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/collect/command.py
@@ -0,0 +1,9 @@
+import pathlib
+
+import changelog_tool.common.git as git
+
+def collect(from_sha: str, to_sha: str, repo_path: pathlib.Path) -> None:
+    print(f"Collecting commits from {from_sha} to {to_sha}...")
+    commits: list[git.Commit] = git.get_commits(from_sha, to_sha, repo_path)
+    
+    print(f"Found {len(commits)} commits")
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/collect/config.py b/scripts/changelog_tool/changelog_tool/collect/config.py
new file mode 100644
index 000000000000..de742e5ca0e8
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/collect/config.py
@@ -0,0 +1,7 @@
+import pathlib
+import pydantic
+
+class CollectConfig(pydantic.BaseModel):
+    from_sha: str
+    to_sha: str
+    repo_path: pathlib.Path = pydantic.Field(default_factory=pathlib.Path.cwd)
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/common/__init__.py b/scripts/changelog_tool/changelog_tool/common/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/scripts/changelog_tool/changelog_tool/common/git.py b/scripts/changelog_tool/changelog_tool/common/git.py
new file mode 100644
index 000000000000..d9c87b21fc04
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/common/git.py
@@ -0,0 +1,149 @@
+from __future__ import annotations
+
+import re
+import subprocess
+import pydantic
+from pathlib import Path
+
+
+class GitError(Exception):
+    """Любая ошибка при работе с git."""
+
+
+class FileChange(pydantic.BaseModel):
+    path: str
+    old_path: str | None = None   # None если файл не переименован
+    added_lines: int = 0
+    removed_lines: int = 0
+
+
+class Commit(pydantic.BaseModel):
+    sha: str
+    title: str                                            # первая строка message
+    message: str                                          # полный message
+    author: str                                           # "Name <email>"
+    co_authors: list[str]                                 # из "Co-authored-by:"
+    changed_files: list[FileChange]
+    total_added: int = 0
+    total_removed: int = 0
+
+
+def get_commits(
+    from_ref: str | None = None,
+    to_ref: str = "HEAD",
+    repo_path: str | Path | None = None,
+) -> list[Commit]:
+    cwd = _repo(repo_path)
+    rev_range = f"{from_ref}..{to_ref}" if from_ref else to_ref
+
+    raw_shas = _run_git(["log", "--format=%H", rev_range], cwd)
+    shas = [s.strip() for s in raw_shas.splitlines() if s.strip()]
+
+    return [_fetch_commit(sha, cwd) for sha in shas]
+
+
+def get_commit(
+    sha: str,
+    repo_path: str | Path | None = None,
+) -> Commit:
+    return _fetch_commit(sha, _repo(repo_path))
+
+
+def _repo(repo_path: str | Path | None) -> Path:
+    return Path(repo_path) if repo_path is not None else Path.cwd()
+
+
+def _run_git(args: list[str], cwd: Path) -> str:
+    try:
+        result = subprocess.run(
+            ["git", *args],
+            cwd=cwd,
+            capture_output=True,
+            text=True,
+        )
+    except FileNotFoundError:
+        raise GitError("git executable not found")
+
+    if result.returncode != 0:
+        raise GitError(result.stderr.strip() or f"git {args[0]} failed")
+
+    return result.stdout
+
+
+def _parse_rename(path_str: str) -> tuple[str, str | None]:
+    m = re.match(r'^(.*?)\{(.*?) => (.*?)\}(.*)$', path_str)
+    if m:
+        pre, old_mid, new_mid, suf = m.groups()
+        old = (pre + old_mid + suf).strip('/')
+        new = (pre + new_mid + suf).strip('/')
+        return new, old
+
+    if ' => ' in path_str:
+        old, new = path_str.split(' => ', 1)
+        return new.strip(), old.strip()
+
+    return path_str, None
+
+
+def _parse_numstat(output: str) -> list[FileChange]:
+    changes: list[FileChange] = []
+
+    for line in output.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+
+        parts = line.split('\t', 2)
+        if len(parts) != 3:
+            continue
+
+        added_str, removed_str, path_str = parts
+
+        added   = 0 if added_str   == '-' else int(added_str)
+        removed = 0 if removed_str == '-' else int(removed_str)
+
+        path, old_path = _parse_rename(path_str)
+        changes.append(FileChange(
+            path=path,
+            old_path=old_path,
+            added_lines=added,
+            removed_lines=removed,
+        ))
+
+    return changes
+
+
+def _parse_co_authors(message: str) -> list[str]:
+    return re.findall(r'(?im)^Co-authored-by:\s*(.+)$', message)
+
+
+def _fetch_commit(sha: str, cwd: Path) -> Commit:
+    raw_meta = _run_git(
+        ["show", "-s", "--format=%H%x00%an <%ae>%x00%B", sha],
+        cwd,
+    )
+    parts = raw_meta.split('\x00', 2)
+    if len(parts) < 3:
+        raise GitError(f"Unexpected git show output for {sha!r}")
+
+    sha_full = parts[0].strip()
+    author   = parts[1].strip()
+    message  = parts[2].strip()
+    title    = message.splitlines()[0] if message else ""
+
+    raw_numstat = _run_git(
+        ["diff-tree", "--root", "--numstat", "-r", "-M", sha],
+        cwd,
+    )
+    changes = _parse_numstat(raw_numstat)
+
+    return Commit(
+        sha=sha_full,
+        title=title,
+        message=message,
+        author=author,
+        co_authors=_parse_co_authors(message),
+        changed_files=changes,
+        total_added=sum(c.added_lines for c in changes),
+        total_removed=sum(c.removed_lines for c in changes),
+    )
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/config.py b/scripts/changelog_tool/changelog_tool/config.py
new file mode 100644
index 000000000000..4fb7d9790575
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/config.py
@@ -0,0 +1,13 @@
+from changelog_tool.collect.config import CollectConfig
+
+import pydantic
+import yaml
+import pathlib
+
+class Config(pydantic.BaseModel):
+    collect: CollectConfig
+
+def parse_config(config_path: pathlib.Path) -> Config:
+    with open(config_path, 'r') as f:
+        yaml_data = yaml.safe_load(f)
+        return Config.model_validate(yaml_data)
\ No newline at end of file
diff --git a/scripts/changelog_tool/requirements.txt b/scripts/changelog_tool/requirements.txt
new file mode 100644
index 000000000000..d25ffcfd0fd9
--- /dev/null
+++ b/scripts/changelog_tool/requirements.txt
@@ -0,0 +1,3 @@
+click >= 8.0.0
+PyYAML >= 6.0.1
+pydantic >= 2.5.3
\ No newline at end of file

From 57a2b05c9c91d8047b6875d8ddea0fa7f2c9a050 Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 01:20:12 +0300
Subject: [PATCH 02/14] collect external

---
 scripts/changelog_tool/AGENTS.md                  | 15 +++++++++++++++
 scripts/changelog_tool/changelog-tool             | 15 ++++++++++-----
 scripts/changelog_tool/changelog.yaml             |  5 ++++-
 .../changelog_tool/collect/command.py             | 14 +++++++++++---
 .../changelog_tool/collect/config.py              |  4 +++-
 .../changelog_tool/changelog_tool/common/git.py   |  1 +
 6 files changed, 44 insertions(+), 10 deletions(-)
 create mode 100644 scripts/changelog_tool/AGENTS.md

diff --git a/scripts/changelog_tool/AGENTS.md b/scripts/changelog_tool/AGENTS.md
new file mode 100644
index 000000000000..cc01cd6d78d0
--- /dev/null
+++ b/scripts/changelog_tool/AGENTS.md
@@ -0,0 +1,15 @@
+# Changelog Tool
+
+This agent is responsible for running the changelog tool, which collects commit information and identifies external contributors.
+
+## Usage
+
+IMPORTANT: The changelog tool must always be run with the virtual environment activated:
+
+```bash
+# Always activate the virtual environment first
+source .vent/bin/activate
+
+# Run the tool
+./changelog-tool [command] [options]
+```
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog-tool b/scripts/changelog_tool/changelog-tool
index 0a3b27939cd2..6c811c94b3bd 100755
--- a/scripts/changelog_tool/changelog-tool
+++ b/scripts/changelog_tool/changelog-tool
@@ -18,11 +18,16 @@ def cli(ctx: click.Context, config: str):
 @click.option('--repo-path', type=pathlib.Path)
 @click.pass_context
 def collect(ctx: click.Context, from_sha: str | None, to_sha: str | None, repo_path: pathlib.Path | None):
-    from_sha = from_sha or ctx.obj["CONFIG"].collect.from_sha
-    to_sha = to_sha or ctx.obj["CONFIG"].collect.to_sha
-    repo_path = repo_path or ctx.obj["CONFIG"].collect.repo_path
-
-    collect_cmd.collect(from_sha, to_sha, repo_path)
+    # Get the config and override with CLI options if provided
+    config = ctx.obj["CONFIG"].collect
+    if from_sha:
+        config.from_sha = from_sha
+    if to_sha:
+        config.to_sha = to_sha
+    if repo_path:
+        config.repo_path = repo_path
+    
+    collect_cmd.collect(config)
 
 if __name__ == '__main__':
     cli()
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog.yaml b/scripts/changelog_tool/changelog.yaml
index bd9e8920d3fd..94c53885b2c0 100644
--- a/scripts/changelog_tool/changelog.yaml
+++ b/scripts/changelog_tool/changelog.yaml
@@ -1,4 +1,7 @@
 collect:
   from_sha: c580979b522f43ea1ab9cd55033cd353d52844f6
   to_sha: HEAD
-  repo_path: ../..
\ No newline at end of file
+  repo_path: ../..
+  core_team_patterns:
+    - ".*@userver\\.tech"
+    - ".*@yandex-team\\.com"
diff --git a/scripts/changelog_tool/changelog_tool/collect/command.py b/scripts/changelog_tool/changelog_tool/collect/command.py
index 5d21d57003bf..724735f812b6 100644
--- a/scripts/changelog_tool/changelog_tool/collect/command.py
+++ b/scripts/changelog_tool/changelog_tool/collect/command.py
@@ -1,9 +1,17 @@
 import pathlib
+import re
 
 import changelog_tool.common.git as git
+from changelog_tool.collect.config import CollectConfig
 
-def collect(from_sha: str, to_sha: str, repo_path: pathlib.Path) -> None:
-    print(f"Collecting commits from {from_sha} to {to_sha}...")
-    commits: list[git.Commit] = git.get_commits(from_sha, to_sha, repo_path)
+def collect(config: CollectConfig) -> None:
+    print(f"Collecting commits from {config.from_sha} to {config.to_sha}...")
+    commits: list[git.Commit] = git.get_commits(config.from_sha, config.to_sha, config.repo_path)
+    
+    core_team_regexes = [re.compile(pattern) for pattern in config.core_team_patterns]
+    
+    for commit in commits:
+        is_core_team = any(regex.match(commit.author) for regex in core_team_regexes)
+        commit.is_external = not is_core_team
     
     print(f"Found {len(commits)} commits")
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/collect/config.py b/scripts/changelog_tool/changelog_tool/collect/config.py
index de742e5ca0e8..e003e0906447 100644
--- a/scripts/changelog_tool/changelog_tool/collect/config.py
+++ b/scripts/changelog_tool/changelog_tool/collect/config.py
@@ -1,7 +1,9 @@
 import pathlib
 import pydantic
+from typing import List
 
 class CollectConfig(pydantic.BaseModel):
     from_sha: str
     to_sha: str
-    repo_path: pathlib.Path = pydantic.Field(default_factory=pathlib.Path.cwd)
\ No newline at end of file
+    repo_path: pathlib.Path = pydantic.Field(default_factory=pathlib.Path.cwd)
+    core_team_patterns: List[str] = pydantic.Field(default_factory=list)
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/common/git.py b/scripts/changelog_tool/changelog_tool/common/git.py
index d9c87b21fc04..c475fcb2d3ea 100644
--- a/scripts/changelog_tool/changelog_tool/common/git.py
+++ b/scripts/changelog_tool/changelog_tool/common/git.py
@@ -26,6 +26,7 @@ class Commit(pydantic.BaseModel):
     changed_files: list[FileChange]
     total_added: int = 0
     total_removed: int = 0
+    is_external: bool = False                             # whether the author is external contributor
 
 
 def get_commits(

From 1e2d76dc1d2558435f4c77eeedd0b1f0e97e3b36 Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 01:39:00 +0300
Subject: [PATCH 03/14] classification

---
 .../changelog_tool/collect/classification.py  | 25 +++++++++++++++++++
 .../changelog_tool/collect/command.py         | 13 +++++++---
 .../changelog_tool/common/git.py              |  1 -
 3 files changed, 35 insertions(+), 4 deletions(-)
 create mode 100644 scripts/changelog_tool/changelog_tool/collect/classification.py

diff --git a/scripts/changelog_tool/changelog_tool/collect/classification.py b/scripts/changelog_tool/changelog_tool/collect/classification.py
new file mode 100644
index 000000000000..48fb4d262338
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/collect/classification.py
@@ -0,0 +1,25 @@
+from enum import Enum
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from changelog_tool.common.git import Commit
+
+
+class Classification(str, Enum):
+    FEATURE = "feature"
+    BUG = "bug"
+    BREAKING_CHANGE = "breaking-change"
+    MINOR_BUG = "minor_bug"
+    REFACTOR = "refactor"
+    DOCS = "docs"
+    UNCLEAR = "unclear"
+
+
+class ClassifiedCommit(Commit):
+    classification: Classification = Classification.UNCLEAR
+    is_external: bool = False
+    to_changelog: bool | None = None
+
+def classify_commit(commit: "Commit") -> Classification:
+    # Default to unclear if no heuristics match
+    return Classification.UNCLEAR
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/collect/command.py b/scripts/changelog_tool/changelog_tool/collect/command.py
index 724735f812b6..55dfd5ce37f2 100644
--- a/scripts/changelog_tool/changelog_tool/collect/command.py
+++ b/scripts/changelog_tool/changelog_tool/collect/command.py
@@ -1,8 +1,10 @@
 import pathlib
 import re
+from typing import List
 
 import changelog_tool.common.git as git
 from changelog_tool.collect.config import CollectConfig
+from changelog_tool.collect.classification import classify_commit, ClassifiedCommit
 
 def collect(config: CollectConfig) -> None:
     print(f"Collecting commits from {config.from_sha} to {config.to_sha}...")
@@ -10,8 +12,13 @@ def collect(config: CollectConfig) -> None:
     
     core_team_regexes = [re.compile(pattern) for pattern in config.core_team_patterns]
     
+    classified_commits: List[ClassifiedCommit] = []
     for commit in commits:
         is_core_team = any(regex.match(commit.author) for regex in core_team_regexes)
-        commit.is_external = not is_core_team
-    
-    print(f"Found {len(commits)} commits")
\ No newline at end of file
+        classified_commits.append(ClassifiedCommit(
+            **commit.model_dump(),
+            classification=classify_commit(commit),
+            is_external=not is_core_team,
+        ))
+        
+    print(f"Found {len(classified_commits)} commits")
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/common/git.py b/scripts/changelog_tool/changelog_tool/common/git.py
index c475fcb2d3ea..d9c87b21fc04 100644
--- a/scripts/changelog_tool/changelog_tool/common/git.py
+++ b/scripts/changelog_tool/changelog_tool/common/git.py
@@ -26,7 +26,6 @@ class Commit(pydantic.BaseModel):
     changed_files: list[FileChange]
     total_added: int = 0
     total_removed: int = 0
-    is_external: bool = False                             # whether the author is external contributor
 
 
 def get_commits(

From 896927e32ebc30194bee5827bd9f0d5818f207ff Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 01:53:35 +0300
Subject: [PATCH 04/14] heuristics

---
 scripts/changelog_tool/AGENTS.md              | 14 ++++++++++
 .../changelog_tool/collect/classification.py  | 28 +++++++++++++++++--
 .../changelog_tool/collect/command.py         |  6 ++--
 .../changelog_tool/common/git.py              |  2 ++
 4 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/scripts/changelog_tool/AGENTS.md b/scripts/changelog_tool/AGENTS.md
index cc01cd6d78d0..0a5084d38e4a 100644
--- a/scripts/changelog_tool/AGENTS.md
+++ b/scripts/changelog_tool/AGENTS.md
@@ -2,6 +2,20 @@
 
 This agent is responsible for running the changelog tool, which collects commit information and identifies external contributors.
 
+## Heuristics for LLM Analysis
+
+The tool uses heuristics to determine which commits should be sent to an LLM for changelog analysis:
+
+We calculate a `score_size` metric as `lines_added + lines_deleted` for each commit.
+
+The tool will NOT send commits to the LLM if they meet any of these criteria:
+1. Any file path contains "docs/" or "documentation", OR commit title contains documentation keywords
+2. Commit title contains fix/bug keywords AND the commit is small (score_size <= 20)
+3. All commits with score_size <= 20
+
+Documentation keywords: "doc", "docs", "documentation", "readme"
+Fix/bug keywords: "fix", "bugfix", "bug", "patch", "repair", "correct", "resolve"
+
 ## Usage
 
 IMPORTANT: The changelog tool must always be run with the virtual environment activated:
diff --git a/scripts/changelog_tool/changelog_tool/collect/classification.py b/scripts/changelog_tool/changelog_tool/collect/classification.py
index 48fb4d262338..e19272481b99 100644
--- a/scripts/changelog_tool/changelog_tool/collect/classification.py
+++ b/scripts/changelog_tool/changelog_tool/collect/classification.py
@@ -13,13 +13,37 @@ class Classification(str, Enum):
     REFACTOR = "refactor"
     DOCS = "docs"
     UNCLEAR = "unclear"
+    MINOR="minor"
 
+MINOR_BUG_SIZE_THRESHOLD = 200
+MINOR_SIZE_THRESHOLD = 50
 
 class ClassifiedCommit(Commit):
     classification: Classification = Classification.UNCLEAR
     is_external: bool = False
     to_changelog: bool | None = None
 
-def classify_commit(commit: "Commit") -> Classification:
-    # Default to unclear if no heuristics match
+def classify_commit(commit: Commit) -> Classification:
+    has_docs_in_files = any(
+        "docs/" in file_change.path.lower() or
+        "documentation" in file_change.path.lower()
+        for file_change in commit.changed_files
+    )
+    
+    doc_keywords = ["doc", "docs", "documentation", "readme"]
+    commit_title_lower = commit.title.lower()
+    has_docs_in_title = any(keyword in commit_title_lower for keyword in doc_keywords)
+    
+    fix_keywords = ["fix", "bugfix", "bug"]
+    has_fix = any(keyword in commit_title_lower for keyword in fix_keywords)
+    
+    if has_docs_in_files or has_docs_in_title:
+        return Classification.DOCS
+        
+    if has_fix and commit.score_size <= MINOR_BUG_SIZE_THRESHOLD:
+        return Classification.MINOR_BUG
+        
+    if commit.score_size <= MINOR_SIZE_THRESHOLD:
+        return Classification.MINOR
+        
     return Classification.UNCLEAR
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/collect/command.py b/scripts/changelog_tool/changelog_tool/collect/command.py
index 55dfd5ce37f2..27388f5d100a 100644
--- a/scripts/changelog_tool/changelog_tool/collect/command.py
+++ b/scripts/changelog_tool/changelog_tool/collect/command.py
@@ -4,7 +4,7 @@
 
 import changelog_tool.common.git as git
 from changelog_tool.collect.config import CollectConfig
-from changelog_tool.collect.classification import classify_commit, ClassifiedCommit
+from changelog_tool.collect.classification import Classification, classify_commit, ClassifiedCommit, should_send_to_llm
 
 def collect(config: CollectConfig) -> None:
     print(f"Collecting commits from {config.from_sha} to {config.to_sha}...")
@@ -15,10 +15,12 @@ def collect(config: CollectConfig) -> None:
     classified_commits: List[ClassifiedCommit] = []
     for commit in commits:
         is_core_team = any(regex.match(commit.author) for regex in core_team_regexes)
+        classification = classify_commit(commit)
         classified_commits.append(ClassifiedCommit(
             **commit.model_dump(),
-            classification=classify_commit(commit),
+            classification=classification,
             is_external=not is_core_team,
+            to_changelog=classification != Classification.UNCLEAR
         ))
         
     print(f"Found {len(classified_commits)} commits")
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/common/git.py b/scripts/changelog_tool/changelog_tool/common/git.py
index d9c87b21fc04..ad85a700579d 100644
--- a/scripts/changelog_tool/changelog_tool/common/git.py
+++ b/scripts/changelog_tool/changelog_tool/common/git.py
@@ -26,6 +26,7 @@ class Commit(pydantic.BaseModel):
     changed_files: list[FileChange]
     total_added: int = 0
     total_removed: int = 0
+    score_size: int = 0
 
 
 def get_commits(
@@ -146,4 +147,5 @@ def _fetch_commit(sha: str, cwd: Path) -> Commit:
         changed_files=changes,
         total_added=sum(c.added_lines for c in changes),
         total_removed=sum(c.removed_lines for c in changes),
+        score_size=sum(c.added_lines + c.removed_lines for c in changes),
     )
\ No newline at end of file

From ebdbc06487b8ac3e7302252b07e2635bef7c2bfb Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 02:15:50 +0300
Subject: [PATCH 05/14] preclassify

---
 .gitignore                                    |  1 +
 scripts/changelog_tool/AGENTS.md              |  9 +++++
 scripts/changelog_tool/changelog-tool         |  8 +++-
 .../changelog_tool/collect/classification.py  |  4 +-
 .../changelog_tool/collect/command.py         | 23 +++++++++---
 .../changelog_tool/collect/config.py          |  3 +-
 .../changelog_tool/common/git.py              |  8 +++-
 .../changelog_tool/common/io.py               | 37 +++++++++++++++++++
 8 files changed, 80 insertions(+), 13 deletions(-)
 create mode 100644 scripts/changelog_tool/changelog_tool/common/io.py

diff --git a/.gitignore b/.gitignore
index a7d344bca5a7..b19b6b5c2154 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@ static-analyzer-report
 .settings*
 .clangd
 .vscode
+.changelog
 scripts/docs/en/components_schema
 scripts/docs/en/dynamic_configs
 scripts/docs/en/versions.md
diff --git a/scripts/changelog_tool/AGENTS.md b/scripts/changelog_tool/AGENTS.md
index 0a5084d38e4a..e1bf08ab547e 100644
--- a/scripts/changelog_tool/AGENTS.md
+++ b/scripts/changelog_tool/AGENTS.md
@@ -26,4 +26,13 @@ source .vent/bin/activate
 
 # Run the tool
 ./changelog-tool [command] [options]
+```
+
+## Output Directory
+
+By default, the tool outputs classified commits to `.changelog/preclassified.json`. You can customize this with the `--output-dir` global option:
+
+```bash
+# Run with custom output directory
+./changelog-tool --output-dir ./my-output-dir collect
 ```
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog-tool b/scripts/changelog_tool/changelog-tool
index 6c811c94b3bd..502b314deffe 100755
--- a/scripts/changelog_tool/changelog-tool
+++ b/scripts/changelog_tool/changelog-tool
@@ -7,10 +7,14 @@ import changelog_tool.collect.command as collect_cmd
 
 @click.group()
 @click.option('--config', default='changelog.yaml')
+@click.option('--output-dir', type=pathlib.Path, default=None)
 @click.pass_context
-def cli(ctx: click.Context, config: str):
+def cli(ctx: click.Context, config: str, output_dir: pathlib.Path | None):
     ctx.ensure_object(dict)
-    ctx.obj["CONFIG"] = cfg.parse_config(pathlib.Path(config))
+    parsed_config = cfg.parse_config(pathlib.Path(config))
+    if output_dir:
+        parsed_config.collect.output_dir = output_dir
+    ctx.obj["CONFIG"] = parsed_config
 
 @cli.command()
 @click.option('--from-sha')
diff --git a/scripts/changelog_tool/changelog_tool/collect/classification.py b/scripts/changelog_tool/changelog_tool/collect/classification.py
index e19272481b99..a1325ea6b772 100644
--- a/scripts/changelog_tool/changelog_tool/collect/classification.py
+++ b/scripts/changelog_tool/changelog_tool/collect/classification.py
@@ -1,8 +1,6 @@
 from enum import Enum
-from typing import TYPE_CHECKING
 
-if TYPE_CHECKING:
-    from changelog_tool.common.git import Commit
+from changelog_tool.common.git import Commit
 
 
 class Classification(str, Enum):
diff --git a/scripts/changelog_tool/changelog_tool/collect/command.py b/scripts/changelog_tool/changelog_tool/collect/command.py
index 27388f5d100a..4fdf2429a3ec 100644
--- a/scripts/changelog_tool/changelog_tool/collect/command.py
+++ b/scripts/changelog_tool/changelog_tool/collect/command.py
@@ -1,10 +1,10 @@
-import pathlib
 import re
 from typing import List
 
 import changelog_tool.common.git as git
+import changelog_tool.common.io as io
 from changelog_tool.collect.config import CollectConfig
-from changelog_tool.collect.classification import Classification, classify_commit, ClassifiedCommit, should_send_to_llm
+from changelog_tool.collect.classification import Classification, classify_commit, ClassifiedCommit
 
 def collect(config: CollectConfig) -> None:
     print(f"Collecting commits from {config.from_sha} to {config.to_sha}...")
@@ -16,11 +16,22 @@ def collect(config: CollectConfig) -> None:
     for commit in commits:
         is_core_team = any(regex.match(commit.author) for regex in core_team_regexes)
         classification = classify_commit(commit)
-        classified_commits.append(ClassifiedCommit(
+        classified_commit = ClassifiedCommit(
             **commit.model_dump(),
             classification=classification,
             is_external=not is_core_team,
-            to_changelog=classification != Classification.UNCLEAR
-        ))
+            to_changelog=None
+        )
         
-    print(f"Found {len(classified_commits)} commits")
\ No newline at end of file
+        if classification in [Classification.FEATURE, Classification.BUG, Classification.BREAKING_CHANGE]:
+            classified_commit.to_changelog = True
+        elif classification == Classification.UNCLEAR:
+            classified_commit.to_changelog = None
+        else:
+            classified_commit.to_changelog = False
+
+        classified_commits.append(classified_commit)
+        
+    print(f"Found {len(classified_commits)} commits")
+    
+    io.dump_classified_commits(classified_commits, config.output_dir, 'preclassified.json')
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/collect/config.py b/scripts/changelog_tool/changelog_tool/collect/config.py
index e003e0906447..1ed44f849aac 100644
--- a/scripts/changelog_tool/changelog_tool/collect/config.py
+++ b/scripts/changelog_tool/changelog_tool/collect/config.py
@@ -6,4 +6,5 @@ class CollectConfig(pydantic.BaseModel):
     from_sha: str
     to_sha: str
     repo_path: pathlib.Path = pydantic.Field(default_factory=pathlib.Path.cwd)
-    core_team_patterns: List[str] = pydantic.Field(default_factory=list)
\ No newline at end of file
+    core_team_patterns: List[str] = pydantic.Field(default_factory=list)
+    output_dir: pathlib.Path = pydantic.Field(default_factory=lambda: pathlib.Path(".changelog"))
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/common/git.py b/scripts/changelog_tool/changelog_tool/common/git.py
index ad85a700579d..f240d2ae38f7 100644
--- a/scripts/changelog_tool/changelog_tool/common/git.py
+++ b/scripts/changelog_tool/changelog_tool/common/git.py
@@ -148,4 +148,10 @@ def _fetch_commit(sha: str, cwd: Path) -> Commit:
         total_added=sum(c.added_lines for c in changes),
         total_removed=sum(c.removed_lines for c in changes),
         score_size=sum(c.added_lines + c.removed_lines for c in changes),
-    )
\ No newline at end of file
+    )
+
+def get_commit_diff(commit: Commit, repo_path: str | Path | None = None) -> str:
+    return get_diff_by_sha(commit.sha, repo_path)
+
+def get_diff_by_sha(sha: str, repo_path: str | Path | None = None) -> str:
+    return _run_git(["diff-tree", "--root", "-p", "-r", "-M", sha], _repo(repo_path))
diff --git a/scripts/changelog_tool/changelog_tool/common/io.py b/scripts/changelog_tool/changelog_tool/common/io.py
new file mode 100644
index 000000000000..47eb987986e6
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/common/io.py
@@ -0,0 +1,37 @@
+import json
+import pathlib
+from typing import List
+
+from changelog_tool.collect.classification import ClassifiedCommit
+
+
+def dump_classified_commits(commits: List[ClassifiedCommit], output_dir: pathlib.Path, filename: str) -> None:
+    # Ensure output directory exists
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    # Create full path to output file
+    output_file = output_dir / filename
+    
+    # Convert classified commits to JSON format
+    json_data = [commit.model_dump() for commit in commits]
+    json_str = json.dumps(json_data, indent=2)
+    
+    # Write to file
+    with open(output_file, 'w') as f:
+        f.write(json_str)
+
+
+def load_classified_commits(output_dir: pathlib.Path, filename: str) -> List[ClassifiedCommit]:
+    # Create full path to input file
+    input_file = output_dir / filename
+    
+    # Check if file exists
+    if not input_file.exists():
+        return []
+    
+    # Read from file
+    with open(input_file, 'r') as f:
+        json_data = json.load(f)
+    
+    # Convert JSON data to ClassifiedCommit objects
+    return [ClassifiedCommit(**item) for item in json_data]
\ No newline at end of file

From 01a1cfbc3007030c2f49bdb5aaa85d4773cf80e4 Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 02:36:32 +0300
Subject: [PATCH 06/14] llm

---
 scripts/changelog_tool/changelog.yaml         |  4 +
 .../changelog_tool/changelog_tool/config.py   |  2 +
 .../changelog_tool/llm/__init__.py            | 11 +++
 .../changelog_tool/llm/client.py              | 92 +++++++++++++++++++
 .../changelog_tool/llm/config.py              |  5 +
 .../changelog_tool/llm/exceptions.py          |  7 ++
 scripts/changelog_tool/requirements.txt       |  4 +-
 7 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 scripts/changelog_tool/changelog_tool/llm/__init__.py
 create mode 100644 scripts/changelog_tool/changelog_tool/llm/client.py
 create mode 100644 scripts/changelog_tool/changelog_tool/llm/config.py
 create mode 100644 scripts/changelog_tool/changelog_tool/llm/exceptions.py

diff --git a/scripts/changelog_tool/changelog.yaml b/scripts/changelog_tool/changelog.yaml
index 94c53885b2c0..e81f60f43588 100644
--- a/scripts/changelog_tool/changelog.yaml
+++ b/scripts/changelog_tool/changelog.yaml
@@ -5,3 +5,7 @@ collect:
   core_team_patterns:
     - ".*@userver\\.tech"
     - ".*@yandex-team\\.com"
+
+llm-config:
+  target_rps: 5.0
+  retries: 3
diff --git a/scripts/changelog_tool/changelog_tool/config.py b/scripts/changelog_tool/changelog_tool/config.py
index 4fb7d9790575..a0ba032814ab 100644
--- a/scripts/changelog_tool/changelog_tool/config.py
+++ b/scripts/changelog_tool/changelog_tool/config.py
@@ -1,4 +1,5 @@
 from changelog_tool.collect.config import CollectConfig
+from changelog_tool.llm.config import LLMConfig
 
 import pydantic
 import yaml
@@ -6,6 +7,7 @@
 
 class Config(pydantic.BaseModel):
     collect: CollectConfig
+    llm_config: LLMConfig = pydantic.Field(alias="llm-config")
 
 def parse_config(config_path: pathlib.Path) -> Config:
     with open(config_path, 'r') as f:
diff --git a/scripts/changelog_tool/changelog_tool/llm/__init__.py b/scripts/changelog_tool/changelog_tool/llm/__init__.py
new file mode 100644
index 000000000000..1a6db4301fb1
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/llm/__init__.py
@@ -0,0 +1,11 @@
+from changelog_tool.llm.config import LLMConfig
+from changelog_tool.llm.exceptions import LLMError, LLMTransientError
+from changelog_tool.llm.client import BaseLLMClient, HttpLLMClient
+
+__all__ = [
+    "LLMConfig",
+    "LLMError",
+    "LLMTransientError",
+    "BaseLLMClient",
+    "HttpLLMClient",
+]
diff --git a/scripts/changelog_tool/changelog_tool/llm/client.py b/scripts/changelog_tool/changelog_tool/llm/client.py
new file mode 100644
index 000000000000..e95b1cecffaa
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/llm/client.py
@@ -0,0 +1,92 @@
+import os
+import asyncio
+from abc import ABC, abstractmethod
+
+import aiohttp
+from aiolimiter import AsyncLimiter
+
+from changelog_tool.llm.config import LLMConfig
+from changelog_tool.llm.exceptions import LLMError, LLMTransientError
+
+
+class BaseLLMClient(ABC):
+    @abstractmethod
+    async def generate(self, prompt: str) -> str:
+        """
+        Асинхронно отправляет текстовый промпт в LLM и возвращает текстовый ответ.
+        Может выбрасывать LLMError или LLMTransientError.
+        """
+        pass
+
+    @abstractmethod
+    async def close(self):
+        """Закрывает ресурсы клиента."""
+        pass
+
+
+class HttpLLMClient(BaseLLMClient):
+    def __init__(self, config: LLMConfig):
+        self.url = os.environ.get("CHANGELOG_LLM_URL")
+        api_key = os.environ.get("CHANGELOG_LLM_API_KEY")
+        oauth_key = os.environ.get("CHANGELOG_LLM_OAUTH_KEY")
+        self.retries = config.retries
+        
+        if not self.url:
+            raise RuntimeError("Missing required environment variable: CHANGELOG_LLM_URL")
+            
+        if api_key:
+            auth_header = f"Bearer {api_key}"
+        elif oauth_key:
+            auth_header = f"OAuth {oauth_key}"
+        else:
+            raise RuntimeError("Missing required environment variable: either CHANGELOG_LLM_API_KEY or CHANGELOG_LLM_OAUTH_KEY must be set")
+            
+        self.limiter = AsyncLimiter(config.target_rps, 1)
+        self.session = aiohttp.ClientSession(
+            headers={"Authorization": auth_header}
+        )
+
+    async def generate(self, prompt: str) -> str:
+        last_error = None
+        
+        for attempt in range(self.retries + 1):
+            try:
+                async with self.limiter:
+                    async with self.session.post(self.url, json={"prompt": prompt}) as response:
+                        if response.status == 200:
+                            data = await response.json()
+                            return data.get("response", "")
+                            
+                        if response.status in (400, 401, 403, 404):
+                            text = await response.text()
+                            raise LLMError(f"Critical LLM error: {response.status} - {text}")
+                            
+                        if response.status == 429:
+                            retry_after = response.headers.get("Retry-After")
+                            if retry_after and retry_after.isdigit():
+                                wait_time = float(retry_after)
+                            else:
+                                wait_time = 2 ** attempt
+                            
+                            last_error = f"429 Too Many Requests. Waiting {wait_time}s"
+                            await asyncio.sleep(wait_time)
+                            continue
+                            
+                        if response.status >= 500:
+                            last_error = f"Server error {response.status}"
+                            await asyncio.sleep(2 ** attempt)
+                            continue
+                            
+                        # Неизвестный статус
+                        text = await response.text()
+                        raise LLMError(f"Unexpected status {response.status}: {text}")
+                        
+            except aiohttp.ClientError as e:
+                last_error = f"Client error: {e}"
+                await asyncio.sleep(2 ** attempt)
+                continue
+                
+        raise LLMTransientError(f"Max retries ({self.retries}) exceeded. Last error: {last_error}")
+
+    async def close(self):
+        await self.session.close()
diff --git a/scripts/changelog_tool/changelog_tool/llm/config.py b/scripts/changelog_tool/changelog_tool/llm/config.py
new file mode 100644
index 000000000000..d2223d64d03d
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/llm/config.py
@@ -0,0 +1,5 @@
+import pydantic
+
+class LLMConfig(pydantic.BaseModel):
+    target_rps: float = 5.0
+    retries: int = 3
diff --git a/scripts/changelog_tool/changelog_tool/llm/exceptions.py b/scripts/changelog_tool/changelog_tool/llm/exceptions.py
new file mode 100644
index 000000000000..3aa06096ec86
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/llm/exceptions.py
@@ -0,0 +1,7 @@
+class LLMError(Exception):
+    """Критическая ошибка LLM (например, неверный формат запроса, 400 Bad Request)."""
+    pass
+
+class LLMTransientError(LLMError):
+    """Временная ошибка LLM (например, 500, 503, таймаут или исчерпаны попытки ретраев)."""
+    pass
diff --git a/scripts/changelog_tool/requirements.txt b/scripts/changelog_tool/requirements.txt
index d25ffcfd0fd9..30798fcb5dec 100644
--- a/scripts/changelog_tool/requirements.txt
+++ b/scripts/changelog_tool/requirements.txt
@@ -1,3 +1,5 @@
 click >= 8.0.0
 PyYAML >= 6.0.1
-pydantic >= 2.5.3
\ No newline at end of file
+pydantic >= 2.5.3
+aiohttp >= 3.9.0
+aiolimiter >= 1.1.0
\ No newline at end of file

From 298caf1ad7497f3862a2a9102e2c6c79bb142286 Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 02:42:21 +0300
Subject: [PATCH 07/14] changelog line

---
 .../changelog_tool/changelog_tool/collect/classification.py | 2 ++
 scripts/changelog_tool/changelog_tool/collect/command.py    | 6 ++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/scripts/changelog_tool/changelog_tool/collect/classification.py b/scripts/changelog_tool/changelog_tool/collect/classification.py
index a1325ea6b772..96e629d241c8 100644
--- a/scripts/changelog_tool/changelog_tool/collect/classification.py
+++ b/scripts/changelog_tool/changelog_tool/collect/classification.py
@@ -20,6 +20,8 @@ class ClassifiedCommit(Commit):
     classification: Classification = Classification.UNCLEAR
     is_external: bool = False
     to_changelog: bool | None = None
+    changelog_line: str | None = None
+    commit_analysis: str | None = None
 
 def classify_commit(commit: Commit) -> Classification:
     has_docs_in_files = any(
diff --git a/scripts/changelog_tool/changelog_tool/collect/command.py b/scripts/changelog_tool/changelog_tool/collect/command.py
index 4fdf2429a3ec..5585d380782f 100644
--- a/scripts/changelog_tool/changelog_tool/collect/command.py
+++ b/scripts/changelog_tool/changelog_tool/collect/command.py
@@ -20,11 +20,13 @@ def collect(config: CollectConfig) -> None:
             **commit.model_dump(),
             classification=classification,
             is_external=not is_core_team,
-            to_changelog=None
+            to_changelog=None,
+            changelog_line=None,
+            commit_analysis=None
         )
         
         if classification in [Classification.FEATURE, Classification.BUG, Classification.BREAKING_CHANGE]:
-            classified_commit.to_changelog = True
+            raise RuntimeError("Unexpected positive changelog preclassification")
         elif classification == Classification.UNCLEAR:
             classified_commit.to_changelog = None
         else:

From 6a6475ad8c540d77419877ba41d099fc23376ccf Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 03:12:20 +0300
Subject: [PATCH 08/14] llm analysis

---
 scripts/changelog_tool/changelog.yaml         |   4 +
 .../changelog_tool/collect/command.py         |  31 ++-
 .../changelog_tool/llm/__init__.py            |   4 +
 .../changelog_tool/llm/config.py              |   4 +
 .../changelog_tool/llm/processor.py           | 196 ++++++++++++++++++
 .../changelog_tool/llm/state.py               |  82 ++++++++
 6 files changed, 320 insertions(+), 1 deletion(-)
 create mode 100644 scripts/changelog_tool/changelog_tool/llm/processor.py
 create mode 100644 scripts/changelog_tool/changelog_tool/llm/state.py

diff --git a/scripts/changelog_tool/changelog.yaml b/scripts/changelog_tool/changelog.yaml
index e81f60f43588..776567036dd8 100644
--- a/scripts/changelog_tool/changelog.yaml
+++ b/scripts/changelog_tool/changelog.yaml
@@ -9,3 +9,7 @@ collect:
 llm-config:
   target_rps: 5.0
   retries: 3
+  max_commits_per_batch: 10
+  max_user_prompt_length: 8000
+  include_diff: true
+  truncate_diff: true
diff --git a/scripts/changelog_tool/changelog_tool/collect/command.py b/scripts/changelog_tool/changelog_tool/collect/command.py
index 5585d380782f..dfe440539499 100644
--- a/scripts/changelog_tool/changelog_tool/collect/command.py
+++ b/scripts/changelog_tool/changelog_tool/collect/command.py
@@ -1,10 +1,15 @@
 import re
+import asyncio
+import os
 from typing import List
 
 import changelog_tool.common.git as git
 import changelog_tool.common.io as io
 from changelog_tool.collect.config import CollectConfig
 from changelog_tool.collect.classification import Classification, classify_commit, ClassifiedCommit
+from changelog_tool.llm.client import HttpLLMClient
+from changelog_tool.llm.processor import LLMProcessor
+from changelog_tool.llm.exceptions import LLMError
 
 def collect(config: CollectConfig) -> None:
     print(f"Collecting commits from {config.from_sha} to {config.to_sha}...")
@@ -35,5 +40,29 @@ def collect(config: CollectConfig) -> None:
         classified_commits.append(classified_commit)
         
     print(f"Found {len(classified_commits)} commits")
+
+    io.dump_classified_commits(classified_commits, config.output_dir, 'preclassified.json')
     
-    io.dump_classified_commits(classified_commits, config.output_dir, 'preclassified.json')
\ No newline at end of file
+    llm_client = HttpLLMClient(config.root.llm_config)
+    llm_processor = LLMProcessor(config.root.llm_config, llm_client, config.output_dir)
+            
+    unclear_commits = [
+        commit for commit in classified_commits
+        if commit.classification == Classification.UNCLEAR
+    ]
+            
+    llm_results = asyncio.run(llm_processor.process_commits(unclear_commits))
+            
+    for commit in classified_commits:
+        if commit.sha in llm_results:
+            result = llm_results[commit.sha]
+            try:
+                commit.classification = Classification(result.get("classification", "unclear"))
+            except ValueError:
+                # Если LLM вернула неизвестную классификацию, оставляем UNCLEAR
+                pass
+            commit.changelog_line = result.get("changelog_line")
+            commit.commit_analysis = result.get("detailed_commit_analysis")
+                        
+
+    io.dump_classified_commits(classified_commits, config.output_dir, 'classified.json')
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/llm/__init__.py b/scripts/changelog_tool/changelog_tool/llm/__init__.py
index 1a6db4301fb1..b64ba976b736 100644
--- a/scripts/changelog_tool/changelog_tool/llm/__init__.py
+++ b/scripts/changelog_tool/changelog_tool/llm/__init__.py
@@ -1,6 +1,8 @@
 from changelog_tool.llm.config import LLMConfig
 from changelog_tool.llm.exceptions import LLMError, LLMTransientError
 from changelog_tool.llm.client import BaseLLMClient, HttpLLMClient
+from changelog_tool.llm.state import LLMState
+from changelog_tool.llm.processor import LLMProcessor
 
 __all__ = [
     "LLMConfig",
@@ -8,4 +10,6 @@
     "LLMTransientError",
     "BaseLLMClient",
     "HttpLLMClient",
+    "LLMState",
+    "LLMProcessor",
 ]
diff --git a/scripts/changelog_tool/changelog_tool/llm/config.py b/scripts/changelog_tool/changelog_tool/llm/config.py
index d2223d64d03d..5327103bd420 100644
--- a/scripts/changelog_tool/changelog_tool/llm/config.py
+++ b/scripts/changelog_tool/changelog_tool/llm/config.py
@@ -3,3 +3,7 @@
 class LLMConfig(pydantic.BaseModel):
     target_rps: float = 5.0
     retries: int = 3
+    max_commits_per_batch: int = 10
+    max_user_prompt_length: int = 8000
+    include_diff: bool = True
+    truncate_diff: bool = True
diff --git a/scripts/changelog_tool/changelog_tool/llm/processor.py b/scripts/changelog_tool/changelog_tool/llm/processor.py
new file mode 100644
index 000000000000..00305cd0987a
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/llm/processor.py
@@ -0,0 +1,196 @@
+import asyncio
+import json
+from typing import List, Dict, Any
+from pathlib import Path
+
+from changelog_tool.common.git import Commit, get_commit_diff
+from changelog_tool.llm.client import BaseLLMClient
+from changelog_tool.llm.config import LLMConfig
+from changelog_tool.llm.state import LLMState
+from changelog_tool.llm.exceptions import LLMError, LLMTransientError
+
+class LLMProcessor:
+    def __init__(self, config: LLMConfig, llm_client: BaseLLMClient, output_dir: Path):
+        self.config = config
+        self.llm_client = llm_client
+        self.output_dir = output_dir
+        self.state = LLMState(output_dir / "llm_state.json")
+        
+    async def process_commits(self, commits: List[Commit]) -> Dict[str, Dict[str, Any]]:
+        """
+        Асинхронно обрабатывает список коммитов через LLM.
+        Возвращает словарь SHA -> dict с результатами (classification, changelog_line, detailed_commit_analysis).
+        """
+        # Загружаем и очищаем стейт
+        await self.state.load()
+        valid_shas = {commit.sha for commit in commits}
+        await self.state.cleanup(valid_shas)
+        
+        # Фильтруем коммиты для обработки
+        commits_to_process = []
+        results = {}
+        
+        for commit in commits:
+            # Проверяем стейт
+            result = await self.state.get_result(commit.sha)
+            if result:
+                results[commit.sha] = result
+            else:
+                commits_to_process.append(commit)
+                
+        print(f"Found {len(commits)} commits, {len(results)} already processed, {len(commits_to_process)} to process via LLM")
+        
+        if not commits_to_process:
+            return results
+            
+        # Разбиваем на батчи
+        batches = [
+            commits_to_process[i:i + self.config.max_commits_per_batch]
+            for i in range(0, len(commits_to_process), self.config.max_commits_per_batch)
+        ]
+        
+        # Обрабатываем батчи параллельно
+        batch_results = await asyncio.gather(
+            *[self._process_batch(batch) for batch in batches],
+            return_exceptions=True
+        )
+        
+        # Собираем результаты
+        for batch_result in batch_results:
+            if isinstance(batch_result, Exception):
+                print(f"Warning: Batch processing failed with exception: {batch_result}")
+                # Ошибки в батчах уже записаны в стейт, просто продолжаем
+                continue
+            results.update(batch_result)
+            
+        return results
+        
+    async def _process_batch(self, batch: List[Commit]) -> Dict[str, Dict[str, Any]]:
+        """Обрабатывает один батч коммитов."""
+        try:
+            prompt = self._build_prompt(batch)
+            
+            # Проверяем длину промпта
+            if len(prompt) > self.config.max_user_prompt_length:
+                if self.config.truncate_diff:
+                    prompt = self._truncate_prompt(prompt)
+                else:
+                    # Помечаем все коммиты батча как ошибочные
+                    error_msg = f"Prompt too long ({len(prompt)} > {self.config.max_user_prompt_length})"
+                    for commit in batch:
+                        await self.state.set_error(commit.sha, error_msg)
+                    return {
+                        commit.sha: {
+                            "classification": "unclear",
+                            "changelog_line": "",
+                            "detailed_commit_analysis": ""
+                        } for commit in batch
+                    }
+                    
+            # Отправляем в LLM
+            response_text = await self.llm_client.generate(prompt)
+            
+            # Парсим ответ
+            try:
+                response_data = json.loads(response_text)
+            except json.JSONDecodeError as e:
+                raise LLMError(f"LLM returned invalid JSON: {e}")
+                
+            # Проверяем формат ответа
+            if not isinstance(response_data, dict):
+                raise LLMError("LLM returned invalid response format (not a dict)")
+                
+            # Сохраняем результаты и возвращаем
+            results = {}
+            for commit in batch:
+                commit_data = response_data.get(commit.sha, {})
+                if isinstance(commit_data, str):
+                    # Fallback if LLM returned just a string
+                    classification = commit_data
+                    changelog_line = ""
+                    detailed_commit_analysis = ""
+                else:
+                    classification = commit_data.get("classification", "unclear")
+                    changelog_line = commit_data.get("changelog_line", "")
+                    detailed_commit_analysis = commit_data.get("detailed_commit_analysis", "")
+                    
+                await self.state.set_result(commit.sha, classification, changelog_line, detailed_commit_analysis)
+                results[commit.sha] = {
+                    "classification": classification,
+                    "changelog_line": changelog_line,
+                    "detailed_commit_analysis": detailed_commit_analysis
+                }
+                
+            return results
+            
+        except LLMError:
+            # Критическая ошибка - пробрасываем дальше
+            raise
+        except Exception as e:
+            # Временная ошибка или другая проблема - помечаем коммиты как ошибочные
+            error_msg = str(e)
+            for commit in batch:
+                await self.state.set_error(commit.sha, error_msg)
+            return {
+                commit.sha: {
+                    "classification": "unclear",
+                    "changelog_line": "",
+                    "detailed_commit_analysis": ""
+                } for commit in batch
+            }
+            
+    def _build_prompt(self, commits: List[Commit]) -> str:
+        """Формирует промпт для батча коммитов."""
+        system_prompt = """You are an expert software engineer analyzing git commits for a changelog.
+Your task is to analyze commits since the last release and highlight important and interesting changes.
+Ignore simple bugfixes, typos, and minor refactoring.
+
+For each commit, you MUST provide a JSON object with the following fields:
+1. "classification": One of ["feature", "breaking-change", "refactor", "minor", "unclear"].
+   - Use "breaking-change" if the commit introduces backward-incompatible changes.
+   - Use "feature" for new functionality.
+   - Use "refactor" for significant architectural changes.
+   - Use "minor" for small improvements.
+   - Use "unclear" if you cannot determine the classification.
+2. "changelog_line": A concise, user-friendly description of the change suitable for a changelog.
+   - IMPORTANT: If the classification is "breaking-change", you MUST include migration or fix instructions in this line if they are present in the commit message.
+3. "detailed_commit_analysis": A detailed analysis of what was added, why it was added, and what impact or benefit it brings to the project.
+
+You MUST return a valid JSON object where keys are commit SHAs and values are the analysis objects.
+Example output format:
+{
+  "commit_sha_1": {
+    "classification": "feature",
+    "changelog_line": "Added support for async LLM processing",
+    "detailed_commit_analysis": "Added a new LLMProcessor class to handle batching and async requests. This improves performance by allowing parallel processing of commits."
+  },
+  "commit_sha_2": {
+    "classification": "breaking-change",
+    "changelog_line": "Changed config format. Migration: rename 'llm_config' to 'llm-config' in your yaml file.",
+    "detailed_commit_analysis": "Updated the configuration schema to use hyphens instead of underscores for consistency. This breaks existing configs but aligns with the project's naming conventions."
+  }
+}
+"""
+        
+        user_parts = []
+        for commit in commits:
+            part = f"Commit SHA: {commit.sha}\n"
+            part += f"Title: {commit.title}\n"
+            part += f"Message: {commit.message}\n"
+            part += f"Changed Files: {', '.join(f.path for f in commit.changed_files)}\n"
+            
+            if self.config.include_diff:
+                diff = get_commit_diff(commit)
+                part += f"Diff:\n{diff}\n"
+                
+            user_parts.append(part)
+            
+        user_prompt = "Please analyze the following commits:\n\n" + "\n---\n".join(user_parts)
+        return f"{system_prompt}\n\n{user_prompt}"
+        
+    def _truncate_prompt(self, prompt: str) -> str:
+        """Обрезает промпт до допустимой длины."""
+        # Простая обрезка - в реальности может потребоваться более умная логика
+        if len(prompt) <= self.config.max_user_prompt_length:
+            return prompt
+        return prompt[:self.config.max_user_prompt_length]
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/llm/state.py b/scripts/changelog_tool/changelog_tool/llm/state.py
new file mode 100644
index 000000000000..85ebbe095262
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/llm/state.py
@@ -0,0 +1,82 @@
+import json
+import asyncio
+from pathlib import Path
+from typing import Dict, Any, Set, Optional
+
+class LLMState:
+    def __init__(self, state_file_path: Path):
+        self.state_file_path = state_file_path
+        self.state: Dict[str, Dict[str, Any]] = {}
+        self.lock = asyncio.Lock()
+        
+    async def load(self) -> None:
+        """Асинхронно загружает состояние из файла."""
+        async with self.lock:
+            if self.state_file_path.exists():
+                try:
+                    with open(self.state_file_path, 'r', encoding='utf-8') as f:
+                        loaded_state = json.load(f)
+                        # Убедимся, что состояние имеет правильный формат
+                        if isinstance(loaded_state, dict):
+                            self.state = loaded_state
+                        else:
+                            self.state = {}
+                except (json.JSONDecodeError, IOError) as e:
+                    print(f"Warning: Could not load state file {self.state_file_path}: {e}")
+                    self.state = {}
+            else:
+                self.state = {}
+                
+    async def save(self) -> None:
+        """Асинхронно сохраняет состояние в файл."""
+        async with self.lock:
+            # Создаем директорию если её нет
+            self.state_file_path.parent.mkdir(parents=True, exist_ok=True)
+            
+            # Атомарная запись через временный файл
+            temp_file = self.state_file_path.with_suffix('.tmp')
+            try:
+                with open(temp_file, 'w', encoding='utf-8') as f:
+                    json.dump(self.state, f, ensure_ascii=False, indent=2)
+                temp_file.replace(self.state_file_path)
+            except IOError as e:
+                print(f"Error: Could not save state file {self.state_file_path}: {e}")
+                if temp_file.exists():
+                    temp_file.unlink()
+                    
+    async def cleanup(self, valid_shas: Set[str]) -> None:
+        """Удаляет из стейта коммиты, не попавшие в текущую выборку."""
+        async with self.lock:
+            keys_to_remove = set(self.state.keys()) - valid_shas
+            for key in keys_to_remove:
+                del self.state[key]
+            if keys_to_remove:
+                await self.save()
+                
+    async def get_result(self, sha: str) -> Optional[Dict[str, Any]]:
+        """Возвращает результат анализа коммита, если он есть и не содержит ошибки."""
+        async with self.lock:
+            commit_data = self.state.get(sha)
+            if commit_data and commit_data.get("error") is None:
+                return commit_data
+            return None
+            
+    async def set_result(self, sha: str, classification: str, changelog_line: str, detailed_commit_analysis: str) -> None:
+        """Сохраняет успешный результат классификации."""
+        async with self.lock:
+            self.state[sha] = {
+                "classification": classification,
+                "changelog_line": changelog_line,
+                "detailed_commit_analysis": detailed_commit_analysis,
+                "error": None
+            }
+            await self.save()
+            
+    async def set_error(self, sha: str, error_message: str) -> None:
+        """Сохраняет ошибку классификации."""
+        async with self.lock:
+            self.state[sha] = {
+                "classification": "unclear",
+                "error": error_message
+            }
+            await self.save()
\ No newline at end of file

From 08aa5020fa1472d74c0bbd2f29c4f7d2e85121ae Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 06:07:50 +0300
Subject: [PATCH 09/14] llm classification 2

---
 scripts/changelog_tool/changelog-tool         |   8 +-
 scripts/changelog_tool/changelog.yaml         |   6 +-
 .../changelog_tool/collect/command.py         |  20 ++--
 .../changelog_tool/llm/client.py              | 111 +++++++++++------
 .../changelog_tool/llm/processor.py           | 113 +++++++++++++++---
 .../changelog_tool/llm/state.py               |  34 +++---
 scripts/changelog_tool/requirements.txt       |   4 +-
 7 files changed, 212 insertions(+), 84 deletions(-)

diff --git a/scripts/changelog_tool/changelog-tool b/scripts/changelog_tool/changelog-tool
index 502b314deffe..858937ac1b2c 100755
--- a/scripts/changelog_tool/changelog-tool
+++ b/scripts/changelog_tool/changelog-tool
@@ -23,13 +23,13 @@ def cli(ctx: click.Context, config: str, output_dir: pathlib.Path | None):
 @click.pass_context
 def collect(ctx: click.Context, from_sha: str | None, to_sha: str | None, repo_path: pathlib.Path | None):
     # Get the config and override with CLI options if provided
-    config = ctx.obj["CONFIG"].collect
+    config = ctx.obj["CONFIG"]
     if from_sha:
-        config.from_sha = from_sha
+        config.collect.from_sha = from_sha
     if to_sha:
-        config.to_sha = to_sha
+        config.collect.to_sha = to_sha
     if repo_path:
-        config.repo_path = repo_path
+        config.collect.repo_path = repo_path
     
     collect_cmd.collect(config)
 
diff --git a/scripts/changelog_tool/changelog.yaml b/scripts/changelog_tool/changelog.yaml
index 776567036dd8..7b6e592a0e8d 100644
--- a/scripts/changelog_tool/changelog.yaml
+++ b/scripts/changelog_tool/changelog.yaml
@@ -7,9 +7,9 @@ collect:
     - ".*@yandex-team\\.com"
 
 llm-config:
-  target_rps: 5.0
+  target_rps: 1
   retries: 3
   max_commits_per_batch: 10
-  max_user_prompt_length: 8000
+  max_user_prompt_length: 50000
   include_diff: true
-  truncate_diff: true
+  truncate_diff: false
diff --git a/scripts/changelog_tool/changelog_tool/collect/command.py b/scripts/changelog_tool/changelog_tool/collect/command.py
index dfe440539499..25eeb1e14b41 100644
--- a/scripts/changelog_tool/changelog_tool/collect/command.py
+++ b/scripts/changelog_tool/changelog_tool/collect/command.py
@@ -5,17 +5,17 @@
 
 import changelog_tool.common.git as git
 import changelog_tool.common.io as io
-from changelog_tool.collect.config import CollectConfig
+from changelog_tool.config import Config
 from changelog_tool.collect.classification import Classification, classify_commit, ClassifiedCommit
 from changelog_tool.llm.client import HttpLLMClient
 from changelog_tool.llm.processor import LLMProcessor
 from changelog_tool.llm.exceptions import LLMError
 
-def collect(config: CollectConfig) -> None:
-    print(f"Collecting commits from {config.from_sha} to {config.to_sha}...")
-    commits: list[git.Commit] = git.get_commits(config.from_sha, config.to_sha, config.repo_path)
+def collect(config: Config) -> None:
+    print(f"Collecting commits from {config.collect.from_sha} to {config.collect.to_sha}...")
+    commits: list[git.Commit] = git.get_commits(config.collect.from_sha, config.collect.to_sha, config.collect.repo_path)
     
-    core_team_regexes = [re.compile(pattern) for pattern in config.core_team_patterns]
+    core_team_regexes = [re.compile(pattern) for pattern in config.collect.core_team_patterns]
     
     classified_commits: List[ClassifiedCommit] = []
     for commit in commits:
@@ -41,10 +41,10 @@ def collect(config: CollectConfig) -> None:
         
     print(f"Found {len(classified_commits)} commits")
 
-    io.dump_classified_commits(classified_commits, config.output_dir, 'preclassified.json')
+    io.dump_classified_commits(classified_commits, config.collect.output_dir, 'preclassified.json')
     
-    llm_client = HttpLLMClient(config.root.llm_config)
-    llm_processor = LLMProcessor(config.root.llm_config, llm_client, config.output_dir)
+    llm_client = HttpLLMClient(config.llm_config)
+    llm_processor = LLMProcessor(config.llm_config, llm_client, config.collect.output_dir)
             
     unclear_commits = [
         commit for commit in classified_commits
@@ -61,8 +61,10 @@ def collect(config: CollectConfig) -> None:
             except ValueError:
                 # Если LLM вернула неизвестную классификацию, оставляем UNCLEAR
                 pass
+            
+            commit.to_changelog = result.get("to_changelog")
             commit.changelog_line = result.get("changelog_line")
             commit.commit_analysis = result.get("detailed_commit_analysis")
                         
 
-    io.dump_classified_commits(classified_commits, config.output_dir, 'classified.json')
\ No newline at end of file
+    io.dump_classified_commits(classified_commits, config.collect.output_dir, 'classified.json')
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/llm/client.py b/scripts/changelog_tool/changelog_tool/llm/client.py
index e95b1cecffaa..26880f6e8e9b 100644
--- a/scripts/changelog_tool/changelog_tool/llm/client.py
+++ b/scripts/changelog_tool/changelog_tool/llm/client.py
@@ -2,7 +2,8 @@
 import asyncio
 from abc import ABC, abstractmethod
 
-import aiohttp
+import httpx
+import openai
 from aiolimiter import AsyncLimiter
 
 from changelog_tool.llm.config import LLMConfig
@@ -29,6 +30,7 @@ def __init__(self, config: LLMConfig):
         self.url = os.environ.get("CHANGELOG_LLM_URL")
         api_key = os.environ.get("CHANGELOG_LLM_API_KEY")
         oauth_key = os.environ.get("CHANGELOG_LLM_OAUTH_KEY")
+        self.model = os.environ.get("CHANGELOG_LLM_MODEL")
         self.retries = config.retries
         
         if not self.url:
@@ -37,13 +39,20 @@ def __init__(self, config: LLMConfig):
         if api_key:
             auth_header = f"Bearer {api_key}"
         elif oauth_key:
-            auth_header = f"OAuth {oauth_key}"
+            auth_header = f"Oauth {oauth_key}"
         else:
             raise RuntimeError("Missing required environment variable: either CHANGELOG_LLM_API_KEY or CHANGELOG_LLM_OAUTH_KEY must be set")
             
         self.limiter = AsyncLimiter(config.target_rps, 1)
-        self.session = aiohttp.ClientSession(
-            headers={"Authorization": auth_header}
+        
+        http_client = httpx.AsyncClient(verify=False)
+        
+        self.client = openai.AsyncOpenAI(
+            base_url=self.url,
+            api_key=api_key or oauth_key or "dummy",
+            default_headers={"Authorization": auth_header},
+            max_retries=0,
+            http_client=http_client,
         )
 
     async def generate(self, prompt: str) -> str:
@@ -52,41 +61,73 @@ async def generate(self, prompt: str) -> str:
         for attempt in range(self.retries + 1):
             try:
                 async with self.limiter:
-                    async with self.session.post(self.url, json={"prompt": prompt}) as response:
-                        if response.status == 200:
-                            data = await response.json()
-                            return data.get("response", "")
-                            
-                        if response.status in (400, 401, 403, 404):
-                            text = await response.text()
-                            raise LLMError(f"Critical LLM error: {response.status} - {text}")
-                            
-                        if response.status == 429:
-                            retry_after = response.headers.get("Retry-After")
-                            if retry_after and retry_after.isdigit():
-                                wait_time = float(retry_after)
-                            else:
-                                wait_time = 2 ** attempt
-                            
-                            last_error = f"429 Too Many Requests. Waiting {wait_time}s"
-                            await asyncio.sleep(wait_time)
-                            continue
-                            
-                        if response.status >= 500:
-                            last_error = f"Server error {response.status}"
-                            await asyncio.sleep(2 ** attempt)
-                            continue
-                            
-                        # Неизвестный статус
-                        text = await response.text()
-                        raise LLMError(f"Unexpected status {response.status}: {text}")
-                        
-            except aiohttp.ClientError as e:
+                    if attempt > 0:
+                        print(f"  Retrying ({attempt}/{self.retries})...")
+                    
+                    response = await self.client.chat.completions.create(
+                        model=self.model,
+                        messages=[{"role": "user", "content": prompt}],
+                    )
+                    
+                    # Handle non-standard API response format
+                    # Some APIs return the actual data in a 'response' dict attribute
+                    if hasattr(response, 'response') and response.response:
+                        response_data = response.response
+                        if isinstance(response_data, dict) and 'choices' in response_data:
+                            choices = response_data['choices']
+                            if choices and len(choices) > 0:
+                                first_choice = choices[0]
+                                if 'message' in first_choice and 'content' in first_choice['message']:
+                                    content = first_choice['message']['content']
+                                    return content or ""
+                    
+                    # Standard OpenAI response format
+                    if not response:
+                        raise ValueError("LLM returned None response")
+                    
+                    if not hasattr(response, 'choices') or not response.choices:
+                        raise ValueError(f"LLM response has no choices. Response: {response}")
+                    
+                    if len(response.choices) == 0:
+                        raise ValueError(f"LLM returned empty choices list. Response: {response}")
+                    
+                    first_choice = response.choices[0]
+                    if not hasattr(first_choice, 'message'):
+                        raise ValueError(f"First choice has no message attribute. Choice: {first_choice}")
+                    
+                    message = first_choice.message
+                    if not hasattr(message, 'content'):
+                        raise ValueError(f"Message has no content attribute. Message: {message}")
+                    
+                    content = message.content
+                    return content or ""
+                    
+            except openai.RateLimitError as e:
+                last_error = f"Rate limit: {e}"
+                print(f"  Rate limit hit, waiting...")
+                await asyncio.sleep(2 ** attempt)
+                continue
+            except openai.APIStatusError as e:
+                if e.status_code in (400, 401, 403, 404):
+                    raise LLMError(f"Critical LLM error: {e.status_code} - {e.message}")
+                if e.status_code >= 500:
+                    last_error = f"Server error {e.status_code}"
+                    print(f"  Server error, retrying...")
+                    await asyncio.sleep(2 ** attempt)
+                    continue
+                raise LLMError(f"Unexpected status {e.status_code}: {e.message}")
+            except openai.APIError as e:
+                last_error = f"API error: {e}"
+                print(f"  API error, retrying...")
+                await asyncio.sleep(2 ** attempt)
+                continue
+            except Exception as e:
                 last_error = f"Client error: {e}"
+                print(f"  Error, retrying...")
                 await asyncio.sleep(2 ** attempt)
                 continue
                 
         raise LLMTransientError(f"Max retries ({self.retries}) exceeded. Last error: {last_error}")
 
     async def close(self):
-        await self.session.close()
+        await self.client.close()
diff --git a/scripts/changelog_tool/changelog_tool/llm/processor.py b/scripts/changelog_tool/changelog_tool/llm/processor.py
index 00305cd0987a..11511aa8d2a0 100644
--- a/scripts/changelog_tool/changelog_tool/llm/processor.py
+++ b/scripts/changelog_tool/changelog_tool/llm/processor.py
@@ -43,31 +43,35 @@ async def process_commits(self, commits: List[Commit]) -> Dict[str, Dict[str, An
         if not commits_to_process:
             return results
             
-        # Разбиваем на батчи
-        batches = [
-            commits_to_process[i:i + self.config.max_commits_per_batch]
-            for i in range(0, len(commits_to_process), self.config.max_commits_per_batch)
-        ]
+        # Разбиваем на батчи с учетом размера промпта
+        batches = self._create_smart_batches(commits_to_process)
+        
+        total_commits = sum(len(batch) for batch in batches)
+        print(f"Processing {total_commits} commits in {len(batches)} batches...")
         
         # Обрабатываем батчи параллельно
         batch_results = await asyncio.gather(
-            *[self._process_batch(batch) for batch in batches],
+            *[self._process_batch(batch, i, len(batches), total_commits) for i, batch in enumerate(batches)],
             return_exceptions=True
         )
         
         # Собираем результаты
-        for batch_result in batch_results:
+        completed_batches = 0
+        for batch_idx, batch_result in enumerate(batch_results):
             if isinstance(batch_result, Exception):
-                print(f"Warning: Batch processing failed with exception: {batch_result}")
                 # Ошибки в батчах уже записаны в стейт, просто продолжаем
                 continue
+            completed_batches += 1
             results.update(batch_result)
+        
+        print(f"Completed {completed_batches}/{len(batches)} batches")
             
         return results
         
-    async def _process_batch(self, batch: List[Commit]) -> Dict[str, Dict[str, Any]]:
+    async def _process_batch(self, batch: List[Commit], batch_idx: int = 0, total_batches: int = 0, total_commits: int = 0) -> Dict[str, Dict[str, Any]]:
         """Обрабатывает один батч коммитов."""
         try:
+            print(f"[{batch_idx + 1}/{total_batches}] Processing {len(batch)} commits...")
             prompt = self._build_prompt(batch)
             
             # Проверяем длину промпта
@@ -82,6 +86,7 @@ async def _process_batch(self, batch: List[Commit]) -> Dict[str, Dict[str, Any]]
                     return {
                         commit.sha: {
                             "classification": "unclear",
+                            "to_changelog": False,
                             "changelog_line": "",
                             "detailed_commit_analysis": ""
                         } for commit in batch
@@ -90,6 +95,15 @@ async def _process_batch(self, batch: List[Commit]) -> Dict[str, Dict[str, Any]]
             # Отправляем в LLM
             response_text = await self.llm_client.generate(prompt)
             
+            # Remove markdown code blocks if present
+            if response_text.strip().startswith('```json'):
+                response_text = response_text.strip()[7:]  # Remove ```json
+            if response_text.strip().startswith('```'):
+                response_text = response_text.strip()[3:]  # Remove ```
+            if response_text.strip().endswith('```'):
+                response_text = response_text.strip()[:-3]  # Remove trailing ```
+            response_text = response_text.strip()
+            
             # Парсим ответ
             try:
                 response_data = json.loads(response_text)
@@ -107,20 +121,28 @@ async def _process_batch(self, batch: List[Commit]) -> Dict[str, Dict[str, Any]]
                 if isinstance(commit_data, str):
                     # Fallback if LLM returned just a string
                     classification = commit_data
+                    to_changelog = classification in ["feature", "breaking-change"]
                     changelog_line = ""
                     detailed_commit_analysis = ""
                 else:
                     classification = commit_data.get("classification", "unclear")
+                    to_changelog = commit_data.get("to_changelog", None)
                     changelog_line = commit_data.get("changelog_line", "")
                     detailed_commit_analysis = commit_data.get("detailed_commit_analysis", "")
-                    
-                await self.state.set_result(commit.sha, classification, changelog_line, detailed_commit_analysis)
+                
+                completed = await self.state.set_result(commit.sha, classification, changelog_line, detailed_commit_analysis, to_changelog)
+                if total_commits > 0:
+                    remaining = total_commits - completed
+                    print(f"  Progress: {completed}/{total_commits} commits, {remaining} remaining")
+                
                 results[commit.sha] = {
                     "classification": classification,
+                    "to_changelog": to_changelog,
                     "changelog_line": changelog_line,
                     "detailed_commit_analysis": detailed_commit_analysis
                 }
                 
+            print(f"[{batch_idx + 1}/{total_batches}] ✓ Completed")
             return results
             
         except LLMError:
@@ -128,46 +150,107 @@ async def _process_batch(self, batch: List[Commit]) -> Dict[str, Dict[str, Any]]
             raise
         except Exception as e:
             # Временная ошибка или другая проблема - помечаем коммиты как ошибочные
-            error_msg = str(e)
+            error_msg = f"{type(e).__name__}: {str(e)}"
+            print(f"✗ Batch {batch_idx + 1}/{total_batches} failed: {error_msg}")
             for commit in batch:
                 await self.state.set_error(commit.sha, error_msg)
             return {
                 commit.sha: {
                     "classification": "unclear",
+                    "to_changelog": None,
                     "changelog_line": "",
                     "detailed_commit_analysis": ""
                 } for commit in batch
             }
             
+    def _create_smart_batches(self, commits: List[Commit]) -> List[List[Commit]]:
+        if not commits:
+            return []
+        
+        batches = []
+        current_batch = []
+        current_prompt_size = 0
+        
+        system_prompt_size = len(self._build_prompt([]))
+        
+        for commit in commits:
+            commit_prompt_size = self._estimate_commit_size(commit)
+            
+            can_add = (
+                len(current_batch) < self.config.max_commits_per_batch and
+                (current_prompt_size + commit_prompt_size + system_prompt_size) <= self.config.max_user_prompt_length
+            )
+            
+            if can_add:
+                current_batch.append(commit)
+                current_prompt_size += commit_prompt_size
+            else:
+                if current_batch:
+                    batches.append(current_batch)
+                
+                current_batch = [commit]
+                current_prompt_size = commit_prompt_size
+        
+        if current_batch:
+            batches.append(current_batch)
+        
+        return batches
+    
+    def _estimate_commit_size(self, commit: Commit) -> int:
+        """Оценивает размер промпта для одного коммита."""
+        size = len(commit.sha) + len(commit.title) + len(commit.message)
+        size += len(', '.join(f.path for f in commit.changed_files))
+        
+        if self.config.include_diff:
+            diff = get_commit_diff(commit)
+            size += len(diff)
+        
+        return size + 200  # запас на JSON форматирование и разделители
+    
     def _build_prompt(self, commits: List[Commit]) -> str:
         """Формирует промпт для батча коммитов."""
         system_prompt = """You are an expert software engineer analyzing git commits for a changelog.
 Your task is to analyze commits since the last release and highlight important and interesting changes.
 Ignore simple bugfixes, typos, and minor refactoring.
 
+IMPORTANT: This is for the USERVER project - a C++ asynchronous framework. Focus on changes that are significant for users of this framework.
+
 For each commit, you MUST provide a JSON object with the following fields:
 1. "classification": One of ["feature", "breaking-change", "refactor", "minor", "unclear"].
    - Use "breaking-change" if the commit introduces backward-incompatible changes.
-   - Use "feature" for new functionality.
+   - Use "feature" for new functionality that is important for USERVER users.
    - Use "refactor" for significant architectural changes.
    - Use "minor" for small improvements.
    - Use "unclear" if you cannot determine the classification.
-2. "changelog_line": A concise, user-friendly description of the change suitable for a changelog.
+2. "to_changelog": Boolean - MUST be true for:
+   - ALL breaking-change commits (these are critical for users)
+   - Features that are significant for USERVER users (new components, major APIs, important functionality)
+   - MUST be false for: minor refactoring, bugfixes, typos, internal changes, test updates
+3. "changelog_line": A concise, user-friendly description of the change suitable for a changelog.
    - IMPORTANT: If the classification is "breaking-change", you MUST include migration or fix instructions in this line if they are present in the commit message.
-3. "detailed_commit_analysis": A detailed analysis of what was added, why it was added, and what impact or benefit it brings to the project.
+   - Only include this if to_changelog is true.
+4. "detailed_commit_analysis": A detailed analysis of what was added, why it was added, and what impact or benefit it brings to the project.
 
 You MUST return a valid JSON object where keys are commit SHAs and values are the analysis objects.
 Example output format:
 {
   "commit_sha_1": {
     "classification": "feature",
+    "to_changelog": true,
     "changelog_line": "Added support for async LLM processing",
     "detailed_commit_analysis": "Added a new LLMProcessor class to handle batching and async requests. This improves performance by allowing parallel processing of commits."
   },
   "commit_sha_2": {
     "classification": "breaking-change",
+    "to_changelog": true,
     "changelog_line": "Changed config format. Migration: rename 'llm_config' to 'llm-config' in your yaml file.",
     "detailed_commit_analysis": "Updated the configuration schema to use hyphens instead of underscores for consistency. This breaks existing configs but aligns with the project's naming conventions."
+  },
+  "commit_sha_3": {
+    "classification": "minor",
+    "to_changelog": false,
+    "changelog_line": "",
+    "detailed_commit_analysis": "Fixed typo in documentation."
   }
 }
 """
diff --git a/scripts/changelog_tool/changelog_tool/llm/state.py b/scripts/changelog_tool/changelog_tool/llm/state.py
index 85ebbe095262..2295c2e6e35c 100644
--- a/scripts/changelog_tool/changelog_tool/llm/state.py
+++ b/scripts/changelog_tool/changelog_tool/llm/state.py
@@ -29,20 +29,19 @@ async def load(self) -> None:
                 
     async def save(self) -> None:
         """Асинхронно сохраняет состояние в файл."""
-        async with self.lock:
-            # Создаем директорию если её нет
-            self.state_file_path.parent.mkdir(parents=True, exist_ok=True)
-            
-            # Атомарная запись через временный файл
-            temp_file = self.state_file_path.with_suffix('.tmp')
-            try:
-                with open(temp_file, 'w', encoding='utf-8') as f:
-                    json.dump(self.state, f, ensure_ascii=False, indent=2)
-                temp_file.replace(self.state_file_path)
-            except IOError as e:
-                print(f"Error: Could not save state file {self.state_file_path}: {e}")
-                if temp_file.exists():
-                    temp_file.unlink()
+        # Создаем директорию если её нет
+        self.state_file_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Атомарная запись через временный файл
+        temp_file = self.state_file_path.with_suffix('.tmp')
+        try:
+            with open(temp_file, 'w', encoding='utf-8') as f:
+                json.dump(self.state, f, ensure_ascii=False, indent=2)
+            temp_file.replace(self.state_file_path)
+        except IOError as e:
+            print(f"Error: Could not save state file {self.state_file_path}: {e}")
+            if temp_file.exists():
+                temp_file.unlink()
                     
     async def cleanup(self, valid_shas: Set[str]) -> None:
         """Удаляет из стейта коммиты, не попавшие в текущую выборку."""
@@ -61,16 +60,19 @@ async def get_result(self, sha: str) -> Optional[Dict[str, Any]]:
                 return commit_data
             return None
             
-    async def set_result(self, sha: str, classification: str, changelog_line: str, detailed_commit_analysis: str) -> None:
-        """Сохраняет успешный результат классификации."""
+    async def set_result(self, sha: str, classification: str, changelog_line: str, detailed_commit_analysis: str, to_changelog: bool = False) -> int:
+        """Сохраняет успешный результат классификации. Возвращает количество готовых коммитов."""
         async with self.lock:
             self.state[sha] = {
                 "classification": classification,
+                "to_changelog": to_changelog,
                 "changelog_line": changelog_line,
                 "detailed_commit_analysis": detailed_commit_analysis,
                 "error": None
             }
+            completed = len([k for k, v in self.state.items() if v.get("error") is None])
             await self.save()
+            return completed
             
     async def set_error(self, sha: str, error_message: str) -> None:
         """Сохраняет ошибку классификации."""
diff --git a/scripts/changelog_tool/requirements.txt b/scripts/changelog_tool/requirements.txt
index 30798fcb5dec..085d00e208db 100644
--- a/scripts/changelog_tool/requirements.txt
+++ b/scripts/changelog_tool/requirements.txt
@@ -1,5 +1,5 @@
 click >= 8.0.0
 PyYAML >= 6.0.1
 pydantic >= 2.5.3
-aiohttp >= 3.9.0
-aiolimiter >= 1.1.0
\ No newline at end of file
+openai >= 1.0.0
+aiolimiter >= 1.1.0

From 16c5adc0032ebe6c0fc664b2bf6c8afce9f63b00 Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 06:31:41 +0300
Subject: [PATCH 10/14] review

---
 scripts/changelog_tool/AGENTS.md              |  40 +++++
 scripts/changelog_tool/changelog-tool         |   8 +
 scripts/changelog_tool/changelog.yaml         |   5 +-
 .../changelog_tool/collect/classification.py  |   8 +-
 .../changelog_tool/changelog_tool/config.py   |   2 +
 .../changelog_tool/review/command.py          | 157 ++++++++++++++++++
 .../changelog_tool/review/config.py           |   6 +
 7 files changed, 218 insertions(+), 8 deletions(-)
 create mode 100644 scripts/changelog_tool/changelog_tool/review/command.py
 create mode 100644 scripts/changelog_tool/changelog_tool/review/config.py

diff --git a/scripts/changelog_tool/AGENTS.md b/scripts/changelog_tool/AGENTS.md
index e1bf08ab547e..c0922631badb 100644
--- a/scripts/changelog_tool/AGENTS.md
+++ b/scripts/changelog_tool/AGENTS.md
@@ -28,6 +28,45 @@ source .vent/bin/activate
 ./changelog-tool [command] [options]
 ```
 
+## Commands
+
+### collect
+
+Collects commits from the specified range and classifies them using heuristics and LLM analysis.
+
+```bash
+./changelog-tool collect [options]
+```
+
+Options:
+- `--from-sha`: Starting commit SHA (overrides config)
+- `--to-sha`: Ending commit SHA (overrides config)
+- `--repo-path`: Path to the repository (overrides config)
+
+### review
+
+Generates a markdown report and an override YAML file for reviewing classified commits.
+
+```bash
+./changelog-tool review
+```
+
+The review command generates two files in the output directory:
+- `review_report.md`: A markdown report showing all commits, sorted by size, with their classification status, changelog lines, and analysis
+- `override.yaml`: A commented YAML file containing all commits that can be uncommented and modified to override classifications
+
+The report is divided into two sections:
+1. **Not in Changelog**: Commits that are not included in the changelog (either filtered by heuristics or marked as unclear)
+2. **In Changelog**: Commits that are included in the changelog
+
+Each commit in the report shows:
+- Commit hash with link to GitHub
+- Commit title
+- Status (✅ In Changelog, ❌ Not in Changelog, or ❓ Unclear)
+- Size (number of lines changed)
+- Changelog line (if available)
+- Analysis (if available)
+
 ## Output Directory
 
 By default, the tool outputs classified commits to `.changelog/preclassified.json`. You can customize this with the `--output-dir` global option:
@@ -35,4 +74,5 @@ By default, the tool outputs classified commits to `.changelog/preclassified.jso
 ```bash
 # Run with custom output directory
 ./changelog-tool --output-dir ./my-output-dir collect
+./changelog-tool --output-dir ./my-output-dir review
 ```
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog-tool b/scripts/changelog_tool/changelog-tool
index 858937ac1b2c..702ce86fc036 100755
--- a/scripts/changelog_tool/changelog-tool
+++ b/scripts/changelog_tool/changelog-tool
@@ -4,6 +4,7 @@ import click
 
 import changelog_tool.config as cfg
 import changelog_tool.collect.command as collect_cmd
+import changelog_tool.review.command as review_cmd
 
 @click.group()
 @click.option('--config', default='changelog.yaml')
@@ -14,6 +15,7 @@ def cli(ctx: click.Context, config: str, output_dir: pathlib.Path | None):
     parsed_config = cfg.parse_config(pathlib.Path(config))
     if output_dir:
         parsed_config.collect.output_dir = output_dir
+        parsed_config.review.output_dir = output_dir
     ctx.obj["CONFIG"] = parsed_config
 
 @cli.command()
@@ -33,5 +35,11 @@ def collect(ctx: click.Context, from_sha: str | None, to_sha: str | None, repo_p
     
     collect_cmd.collect(config)
 
+@cli.command()
+@click.pass_context
+def review(ctx: click.Context):
+    config = ctx.obj["CONFIG"]
+    review_cmd.review(config)
+
 if __name__ == '__main__':
     cli()
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog.yaml b/scripts/changelog_tool/changelog.yaml
index 7b6e592a0e8d..e570506f28f0 100644
--- a/scripts/changelog_tool/changelog.yaml
+++ b/scripts/changelog_tool/changelog.yaml
@@ -10,6 +10,9 @@ llm-config:
   target_rps: 1
   retries: 3
   max_commits_per_batch: 10
-  max_user_prompt_length: 50000
+  max_user_prompt_length: 100000
   include_diff: true
   truncate_diff: false
+
+review:
+  github_url: "https://github.com/userver-framework/userver"
diff --git a/scripts/changelog_tool/changelog_tool/collect/classification.py b/scripts/changelog_tool/changelog_tool/collect/classification.py
index 96e629d241c8..0dbe6125a0ab 100644
--- a/scripts/changelog_tool/changelog_tool/collect/classification.py
+++ b/scripts/changelog_tool/changelog_tool/collect/classification.py
@@ -24,12 +24,6 @@ class ClassifiedCommit(Commit):
     commit_analysis: str | None = None
 
 def classify_commit(commit: Commit) -> Classification:
-    has_docs_in_files = any(
-        "docs/" in file_change.path.lower() or
-        "documentation" in file_change.path.lower()
-        for file_change in commit.changed_files
-    )
-    
     doc_keywords = ["doc", "docs", "documentation", "readme"]
     commit_title_lower = commit.title.lower()
     has_docs_in_title = any(keyword in commit_title_lower for keyword in doc_keywords)
@@ -37,7 +31,7 @@ def classify_commit(commit: Commit) -> Classification:
     fix_keywords = ["fix", "bugfix", "bug"]
     has_fix = any(keyword in commit_title_lower for keyword in fix_keywords)
     
-    if has_docs_in_files or has_docs_in_title:
+    if has_docs_in_title:
         return Classification.DOCS
         
     if has_fix and commit.score_size <= MINOR_BUG_SIZE_THRESHOLD:
diff --git a/scripts/changelog_tool/changelog_tool/config.py b/scripts/changelog_tool/changelog_tool/config.py
index a0ba032814ab..fc0ada2f8ab3 100644
--- a/scripts/changelog_tool/changelog_tool/config.py
+++ b/scripts/changelog_tool/changelog_tool/config.py
@@ -1,5 +1,6 @@
 from changelog_tool.collect.config import CollectConfig
 from changelog_tool.llm.config import LLMConfig
+from changelog_tool.review.config import ReviewConfig
 
 import pydantic
 import yaml
@@ -8,6 +9,7 @@
 class Config(pydantic.BaseModel):
     collect: CollectConfig
     llm_config: LLMConfig = pydantic.Field(alias="llm-config")
+    review: ReviewConfig
 
 def parse_config(config_path: pathlib.Path) -> Config:
     with open(config_path, 'r') as f:
diff --git a/scripts/changelog_tool/changelog_tool/review/command.py b/scripts/changelog_tool/changelog_tool/review/command.py
new file mode 100644
index 000000000000..8249499d71bd
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/review/command.py
@@ -0,0 +1,157 @@
+import pathlib
+from typing import List
+
+import changelog_tool.common.io as io
+from changelog_tool.config import Config
+from changelog_tool.collect.classification import ClassifiedCommit, Classification
+
+
+def review(config: Config) -> None:
+    print(f"Loading classified commits from {config.review.output_dir}...")
+    classified_commits: List[ClassifiedCommit] = io.load_classified_commits(
+        config.review.output_dir, 'classified.json'
+    )
+    
+    if not classified_commits:
+        print("No classified commits found. Please run 'collect' command first.")
+        return
+    
+    print(f"Found {len(classified_commits)} classified commits")
+    
+    # Split commits into two groups
+    not_in_changelog: List[ClassifiedCommit] = []
+    in_changelog: List[ClassifiedCommit] = []
+    
+    for commit in classified_commits:
+        if commit.to_changelog is False or commit.classification == Classification.UNCLEAR:
+            not_in_changelog.append(commit)
+        elif commit.to_changelog is True:
+            in_changelog.append(commit)
+    
+    # Sort both groups by score_size (descending)
+    not_in_changelog.sort(key=lambda c: c.score_size, reverse=True)
+    in_changelog.sort(key=lambda c: c.score_size, reverse=True)
+    
+    # Generate markdown report
+    markdown_content = _generate_markdown_report(
+        not_in_changelog, in_changelog, config.review.github_url
+    )
+    
+    # Generate override YAML
+    override_yaml_content = _generate_override_yaml(
+        not_in_changelog, in_changelog
+    )
+    
+    # Write output files
+    output_dir = config.review.output_dir
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    markdown_file = output_dir / 'review_report.md'
+    with open(markdown_file, 'w') as f:
+        f.write(markdown_content)
+    print(f"Generated markdown report: {markdown_file}")
+    
+    override_file = output_dir / 'override.yaml'
+    with open(override_file, 'w') as f:
+        f.write(override_yaml_content)
+    print(f"Generated override YAML: {override_file}")
+
+
+def _generate_markdown_report(
+    not_in_changelog: List[ClassifiedCommit],
+    in_changelog: List[ClassifiedCommit],
+    github_url: str
+) -> str:
+    lines = []
+    
+    # Header
+    lines.append("# Changelog Review Report\n")
+    
+    # Not in changelog section
+    lines.append("## Not in Changelog\n")
+    lines.append(f"Total: {len(not_in_changelog)} commits\n")
+    
+    for commit in not_in_changelog:
+        lines.append(_format_commit_markdown(commit, github_url))
+        lines.append("")
+    
+    # In changelog section
+    lines.append("## In Changelog\n")
+    lines.append(f"Total: {len(in_changelog)} commits\n")
+    
+    for commit in in_changelog:
+        lines.append(_format_commit_markdown(commit, github_url))
+        lines.append("")
+    
+    return "\n".join(lines)
+
+
+def _format_commit_markdown(commit: ClassifiedCommit, github_url: str) -> str:
+    short_sha = commit.sha[:8]
+    commit_url = f"{github_url}/commit/{commit.sha}"
+    
+    lines = []
+    lines.append(f"### [{short_sha}]({commit_url}) {commit.title}")
+    lines.append("")
+    
+    # Status
+    if commit.to_changelog is True:
+        status = "✅ In Changelog"
+    elif commit.to_changelog is False:
+        status = f"❌ Not in Changelog (Classification: {commit.classification})"
+    else:
+        status = f"❓ Unclear (Classification: {commit.classification})"
+    
+    lines.append(f"**Status:** {status}")
+    lines.append(f"**Size:** {commit.score_size} lines changed")
+    lines.append("")
+    
+    # Changelog line (if available)
+    if commit.changelog_line:
+        lines.append(f"**Changelog Line:** {commit.changelog_line}")
+        lines.append("")
+    
+    # Analysis (if available)
+    if commit.commit_analysis:
+        lines.append("**Analysis:**")
+        lines.append(commit.commit_analysis)
+        lines.append("")
+    
+    return "\n".join(lines)
+
+
+def _generate_override_yaml(
+    not_in_changelog: List[ClassifiedCommit],
+    in_changelog: List[ClassifiedCommit]
+) -> str:
+    lines = []
+    
+    # Header comment
+    lines.append("# Override file for changelog classification")
+    lines.append("# Uncomment and modify entries to override classification")
+    lines.append("")
+    
+    # Process all commits in order
+    all_commits = not_in_changelog + in_changelog
+    
+    for commit in all_commits:
+        lines.append(f"# {commit.sha}:")
+        lines.append(f"#   commit_title: \"{commit.title}\"")
+        
+        if commit.to_changelog is True:
+            to_changelog = "true"
+        elif commit.to_changelog is False:
+            to_changelog = "false"
+        else:
+            to_changelog = "null"
+        
+        lines.append(f"#   to_changelog: {to_changelog}")
+        
+        if commit.changelog_line:
+            lines.append(f"#   changelog_line: \"{commit.changelog_line}\"")
+        else:
+            lines.append(f"#   changelog_line: null")
+        
+        lines.append("")
+    
+    return "\n".join(lines)
diff --git a/scripts/changelog_tool/changelog_tool/review/config.py b/scripts/changelog_tool/changelog_tool/review/config.py
new file mode 100644
index 000000000000..9a40fbf578ec
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/review/config.py
@@ -0,0 +1,6 @@
+import pathlib
+import pydantic
+
+class ReviewConfig(pydantic.BaseModel):
+    github_url: str
+    output_dir: pathlib.Path = pydantic.Field(default_factory=lambda: pathlib.Path(".changelog"))

From dbdb6acfca89d6669a60ff8f6fa9d765e3a2a43f Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 10:21:38 +0300
Subject: [PATCH 11/14] report generation

---
 scripts/changelog_tool/AGENTS.md              |  25 +++
 scripts/changelog_tool/changelog-tool         |   8 +
 scripts/changelog_tool/changelog.yaml         |   7 +-
 .../changelog_tool/collect/classification.py  |   1 +
 .../changelog_tool/changelog_tool/config.py   |   2 +
 .../changelog_tool/llm/processor.py           |   3 +-
 .../changelog_tool/report/command.py          | 175 ++++++++++++++++++
 .../changelog_tool/report/config.py           |   6 +
 8 files changed, 224 insertions(+), 3 deletions(-)
 create mode 100644 scripts/changelog_tool/changelog_tool/report/command.py
 create mode 100644 scripts/changelog_tool/changelog_tool/report/config.py

diff --git a/scripts/changelog_tool/AGENTS.md b/scripts/changelog_tool/AGENTS.md
index c0922631badb..c53eb45e36be 100644
--- a/scripts/changelog_tool/AGENTS.md
+++ b/scripts/changelog_tool/AGENTS.md
@@ -67,6 +67,31 @@ Each commit in the report shows:
 - Changelog line (if available)
 - Analysis (if available)
 
+### report
+
+Generates a formatted Markdown changelog based on the review output and applies user overrides.
+
+```bash
+./changelog-tool report
+```
+
+The report command performs the following steps:
+1. Loads classified commits from `classified.json`
+2. Applies overrides from `override.yaml` (if present)
+3. Identifies commits marked for the changelog that lack changelog lines or analysis
+4. Runs these commits through the LLM with 1.5x increased prompt size and diff truncation enabled
+5. Generates a formatted Markdown changelog grouped by classification:
+   - Breaking Changes
+   - Features
+   - Optimizations
+   - Bug Fixes
+   - Refactoring
+   - Minor Changes
+   - Documentation
+6. Appends "Many thanks to [Name] for the PR!" for external contributors in the changelog
+7. Appends a section at the end for external contributors not included in the changelog
+8. Saves the generated changelog to `changelog.md` in the output directory
+
 ## Output Directory
 
 By default, the tool outputs classified commits to `.changelog/preclassified.json`. You can customize this with the `--output-dir` global option:
diff --git a/scripts/changelog_tool/changelog-tool b/scripts/changelog_tool/changelog-tool
index 702ce86fc036..801aca07f57b 100755
--- a/scripts/changelog_tool/changelog-tool
+++ b/scripts/changelog_tool/changelog-tool
@@ -5,6 +5,7 @@ import click
 import changelog_tool.config as cfg
 import changelog_tool.collect.command as collect_cmd
 import changelog_tool.review.command as review_cmd
+import changelog_tool.report.command as report_cmd
 
 @click.group()
 @click.option('--config', default='changelog.yaml')
@@ -16,6 +17,7 @@ def cli(ctx: click.Context, config: str, output_dir: pathlib.Path | None):
     if output_dir:
         parsed_config.collect.output_dir = output_dir
         parsed_config.review.output_dir = output_dir
+        parsed_config.report.output_dir = output_dir
     ctx.obj["CONFIG"] = parsed_config
 
 @cli.command()
@@ -41,5 +43,11 @@ def review(ctx: click.Context):
     config = ctx.obj["CONFIG"]
     review_cmd.review(config)
 
+@cli.command()
+@click.pass_context
+def report(ctx: click.Context):
+    config = ctx.obj["CONFIG"]
+    report_cmd.report(config)
+
 if __name__ == '__main__':
     cli()
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog.yaml b/scripts/changelog_tool/changelog.yaml
index e570506f28f0..b76d6e3bf37a 100644
--- a/scripts/changelog_tool/changelog.yaml
+++ b/scripts/changelog_tool/changelog.yaml
@@ -1,5 +1,5 @@
 collect:
-  from_sha: c580979b522f43ea1ab9cd55033cd353d52844f6
+  from_sha: da8642900398c33333e29e2bd3e91ca4e181f602
   to_sha: HEAD
   repo_path: ../..
   core_team_patterns:
@@ -9,10 +9,13 @@ collect:
 llm-config:
   target_rps: 1
   retries: 3
-  max_commits_per_batch: 10
+  max_commits_per_batch: 4
   max_user_prompt_length: 100000
   include_diff: true
   truncate_diff: false
 
 review:
   github_url: "https://github.com/userver-framework/userver"
+
+report:
+  github_url: "https://github.com/userver-framework/userver"
diff --git a/scripts/changelog_tool/changelog_tool/collect/classification.py b/scripts/changelog_tool/changelog_tool/collect/classification.py
index 0dbe6125a0ab..9f0a9c0abc2c 100644
--- a/scripts/changelog_tool/changelog_tool/collect/classification.py
+++ b/scripts/changelog_tool/changelog_tool/collect/classification.py
@@ -12,6 +12,7 @@ class Classification(str, Enum):
     DOCS = "docs"
     UNCLEAR = "unclear"
     MINOR="minor"
+    OPTIMIZATION = "optimization"
 
 MINOR_BUG_SIZE_THRESHOLD = 200
 MINOR_SIZE_THRESHOLD = 50
diff --git a/scripts/changelog_tool/changelog_tool/config.py b/scripts/changelog_tool/changelog_tool/config.py
index fc0ada2f8ab3..57b38b878d01 100644
--- a/scripts/changelog_tool/changelog_tool/config.py
+++ b/scripts/changelog_tool/changelog_tool/config.py
@@ -1,6 +1,7 @@
 from changelog_tool.collect.config import CollectConfig
 from changelog_tool.llm.config import LLMConfig
 from changelog_tool.review.config import ReviewConfig
+from changelog_tool.report.config import ReportConfig
 
 import pydantic
 import yaml
@@ -10,6 +11,7 @@ class Config(pydantic.BaseModel):
     collect: CollectConfig
     llm_config: LLMConfig = pydantic.Field(alias="llm-config")
     review: ReviewConfig
+    report: ReportConfig
 
 def parse_config(config_path: pathlib.Path) -> Config:
     with open(config_path, 'r') as f:
diff --git a/scripts/changelog_tool/changelog_tool/llm/processor.py b/scripts/changelog_tool/changelog_tool/llm/processor.py
index 11511aa8d2a0..ac3bde54142b 100644
--- a/scripts/changelog_tool/changelog_tool/llm/processor.py
+++ b/scripts/changelog_tool/changelog_tool/llm/processor.py
@@ -216,11 +216,12 @@ def _build_prompt(self, commits: List[Commit]) -> str:
 IMPORTANT: This is for the USERVER project - a C++ asynchronous framework. Focus on changes that are significant for users of this framework.
 
 For each commit, you MUST provide a JSON object with the following fields:
-1. "classification": One of ["feature", "breaking-change", "refactor", "minor", "unclear"].
+1. "classification": One of ["feature", "breaking-change", "refactor", "minor", "optimization", "unclear"].
    - Use "breaking-change" if the commit introduces backward-incompatible changes.
    - Use "feature" for new functionality that is important for USERVER users.
    - Use "refactor" for significant architectural changes.
    - Use "minor" for small improvements.
+   - Use "optimization" for performance improvements, optimizations, and efficiency gains.
    - Use "unclear" if you cannot determine the classification.
 2. "to_changelog": Boolean - MUST be true for:
    - ALL breaking-change commits (these are critical for users)
diff --git a/scripts/changelog_tool/changelog_tool/report/command.py b/scripts/changelog_tool/changelog_tool/report/command.py
new file mode 100644
index 000000000000..073736d7b0d4
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/report/command.py
@@ -0,0 +1,175 @@
+import asyncio
+import pathlib
+import re
+from typing import List, Dict, Any
+
+import changelog_tool.common.git as git
+import changelog_tool.common.io as io
+from changelog_tool.config import Config
+from changelog_tool.collect.classification import ClassifiedCommit, Classification
+from changelog_tool.llm.client import HttpLLMClient
+from changelog_tool.llm.processor import LLMProcessor
+from changelog_tool.llm.config import LLMConfig
+
+
+def report(config: Config) -> None:
+    print(f"Loading classified commits from {config.report.output_dir}...")
+    classified_commits: List[ClassifiedCommit] = io.load_classified_commits(
+        config.report.output_dir, 'classified.json'
+    )
+    
+    if not classified_commits:
+        print("No classified commits found. Please run 'collect' command first.")
+        return
+    
+    print(f"Found {len(classified_commits)} classified commits")
+    
+    # Load and apply overrides
+    override_file = config.report.output_dir / 'override.yaml'
+    if override_file.exists():
+        print(f"Applying overrides from {override_file}...")
+        _apply_overrides(classified_commits, override_file)
+    
+    # Identify commits that need LLM analysis
+    commits_needing_analysis = [
+        commit for commit in classified_commits
+        if commit.to_changelog is True and (not commit.changelog_line or not commit.commit_analysis)
+    ]
+    
+    if commits_needing_analysis:
+        print(f"Found {len(commits_needing_analysis)} commits needing LLM analysis")
+        
+        # Create modified LLM config with 1.5x prompt size and truncate enabled
+        modified_llm_config = LLMConfig(
+            target_rps=config.llm_config.target_rps,
+            retries=config.llm_config.retries,
+            max_commits_per_batch=config.llm_config.max_commits_per_batch,
+            max_user_prompt_length=int(config.llm_config.max_user_prompt_length * 1.5),
+            include_diff=config.llm_config.include_diff,
+            truncate_diff=True
+        )
+        
+        llm_client = HttpLLMClient(modified_llm_config)
+        llm_processor = LLMProcessor(modified_llm_config, llm_client, config.report.output_dir)
+        
+        llm_results = asyncio.run(llm_processor.process_commits(commits_needing_analysis))
+        
+        # Update commits with LLM results
+        for commit in classified_commits:
+            if commit.sha in llm_results:
+                result = llm_results[commit.sha]
+                commit.changelog_line = result.get("changelog_line", "")
+                commit.commit_analysis = result.get("detailed_commit_analysis", "")
+                try:
+                    commit.classification = Classification(result.get("classification", "unclear"))
+                except ValueError:
+                    pass
+                print(f"Updated commit {commit.sha} with LLM results")
+    
+    # Generate changelog
+    print("Generating changelog...")
+    changelog_content = _generate_changelog(classified_commits, config.report.github_url)
+    
+    # Save changelog
+    changelog_file = config.report.output_dir / 'changelog.md'
+    with open(changelog_file, 'w') as f:
+        f.write(changelog_content)
+    print(f"Generated changelog: {changelog_file}")
+
+
+def _apply_overrides(commits: List[ClassifiedCommit], override_file: pathlib.Path) -> None:
+    """Parse override.yaml and apply overrides to commits."""
+    import yaml
+    
+    with open(override_file, 'r') as f:
+        override_data = yaml.safe_load(f)
+    
+    if not override_data:
+        return
+    
+    # Create a mapping of SHA to commit for quick lookup
+    commit_map = {commit.sha: commit for commit in commits}
+    
+    for sha, override in override_data.items():
+        if sha in commit_map:
+            commit = commit_map[sha]
+            if 'to_changelog' in override:
+                commit.to_changelog = override['to_changelog']
+            if 'changelog_line' in override:
+                commit.changelog_line = override['changelog_line']
+
+
+def _generate_changelog(commits: List[ClassifiedCommit], github_url: str) -> str:
+    """Generate formatted Markdown changelog."""
+    lines = []
+    
+    # Group commits by classification
+    groups: Dict[str, List[ClassifiedCommit]] = {}
+    for commit in commits:
+        if commit.to_changelog is True and commit.changelog_line:
+            classification = commit.classification.value
+            if classification not in groups:
+                groups[classification] = []
+            groups[classification].append(commit)
+    
+    # Define order of classifications
+    classification_order = [
+        "breaking-change",
+        "feature",
+        "optimization",
+        "bug",
+        "refactor",
+        "minor",
+        "docs",
+        "unclear"
+    ]
+    
+    # Generate sections for each classification
+    for classification in classification_order:
+        if classification not in groups:
+            continue
+        
+        section_commits = groups[classification]
+        if not section_commits:
+            continue
+        
+        # Section header
+        section_title = classification.replace("-", " ").title()
+        lines.append(f"* {section_title}")
+        lines.append("")
+        
+        # Commit entries
+        for commit in section_commits:
+            line = f"  * {commit.changelog_line}"
+            
+            # Add external contributor thanks
+            if commit.is_external:
+                author_name = _extract_author_name(commit.author)
+                line += f" Many thanks to {author_name} for the PR!"
+            
+            lines.append(line)
+        
+        lines.append("")
+    
+    # Collect external contributors not in changelog
+    external_contributors_not_in_changelog = set()
+    for commit in commits:
+        if commit.is_external and (commit.to_changelog is False or commit.to_changelog is None):
+            author_name = _extract_author_name(commit.author)
+            external_contributors_not_in_changelog.add(author_name)
+    
+    if external_contributors_not_in_changelog:
+        lines.append("* Many thanks to:")
+        for contributor in sorted(external_contributors_not_in_changelog):
+            lines.append(f"  * {contributor} for the contribution!")
+        lines.append("")
+    
+    return "\n".join(lines)
+
+
+def _extract_author_name(author: str) -> str:
+    """Extract author name from 'Name <email>' format."""
+    match = re.match(r'^(.+?)\s*<', author)
+    if match:
+        return match.group(1).strip()
+    return author
diff --git a/scripts/changelog_tool/changelog_tool/report/config.py b/scripts/changelog_tool/changelog_tool/report/config.py
new file mode 100644
index 000000000000..74d84c30fc98
--- /dev/null
+++ b/scripts/changelog_tool/changelog_tool/report/config.py
@@ -0,0 +1,6 @@
+import pathlib
+import pydantic
+
+class ReportConfig(pydantic.BaseModel):
+    github_url: str
+    output_dir: pathlib.Path = pydantic.Field(default_factory=lambda: pathlib.Path(".changelog"))

From c4de078d7f700c17e75cdbf04aa9d7f281f5218f Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 11:43:13 +0300
Subject: [PATCH 12/14] changelog-tool complete

---
 scripts/changelog_tool/changelog.yaml         |  3 +-
 .../changelog_tool/collect/classification.py  |  1 +
 .../changelog_tool/collect/command.py         | 38 +++++++++-
 .../changelog_tool/llm/client.py              | 66 ++++++++---------
 .../changelog_tool/llm/config.py              |  1 +
 .../changelog_tool/report/command.py          | 71 +++++++++++++++----
 6 files changed, 134 insertions(+), 46 deletions(-)

diff --git a/scripts/changelog_tool/changelog.yaml b/scripts/changelog_tool/changelog.yaml
index b76d6e3bf37a..e38c1c2ee8e7 100644
--- a/scripts/changelog_tool/changelog.yaml
+++ b/scripts/changelog_tool/changelog.yaml
@@ -8,11 +8,12 @@ collect:
 
 llm-config:
   target_rps: 1
-  retries: 3
+  retries: 7
   max_commits_per_batch: 4
   max_user_prompt_length: 100000
   include_diff: true
   truncate_diff: false
+  max_concurrent_requests: 2
 
 review:
   github_url: "https://github.com/userver-framework/userver"
diff --git a/scripts/changelog_tool/changelog_tool/collect/classification.py b/scripts/changelog_tool/changelog_tool/collect/classification.py
index 9f0a9c0abc2c..cd83f7c870ff 100644
--- a/scripts/changelog_tool/changelog_tool/collect/classification.py
+++ b/scripts/changelog_tool/changelog_tool/collect/classification.py
@@ -23,6 +23,7 @@ class ClassifiedCommit(Commit):
     to_changelog: bool | None = None
     changelog_line: str | None = None
     commit_analysis: str | None = None
+    component: str | None = None
 
 def classify_commit(commit: Commit) -> Classification:
     doc_keywords = ["doc", "docs", "documentation", "readme"]
diff --git a/scripts/changelog_tool/changelog_tool/collect/command.py b/scripts/changelog_tool/changelog_tool/collect/command.py
index 25eeb1e14b41..9e02ccb07adc 100644
--- a/scripts/changelog_tool/changelog_tool/collect/command.py
+++ b/scripts/changelog_tool/changelog_tool/collect/command.py
@@ -11,6 +11,38 @@
 from changelog_tool.llm.processor import LLMProcessor
 from changelog_tool.llm.exceptions import LLMError
 
+
+def _extract_component_from_title(title: str) -> str | None:
+    """Extract component name from commit title.
+    
+    Examples:
+    - "feat odbc: improve driver" -> "odbc"
+    - "fix(redis): connection leak" -> "redis"
+    - "feat chaotic: deal with..." -> "chaotic"
+    - "docs: update README" -> None
+    """
+    # Pattern: type(component): or type component: or type component description
+    match = re.match(r'^(\w+)(?:\(([^)]+)\))?:?\s*(.+)', title)
+    if match:
+        commit_type = match.group(1)
+        component = match.group(2)
+        description = match.group(3)
+        
+        # If component in parentheses, use it
+        if component:
+            return component.lower()
+        
+        # If no component in parentheses, check description
+        words = description.split()
+        if words:
+            # Check if first word ends with colon (e.g., "odbc: improve driver")
+            if words[0].endswith(':'):
+                return words[0][:-1].lower()
+            # Check if first word is followed by a colon (e.g., "chaotic: deal with...")
+            if len(words) > 1 and words[1].startswith(':'):
+                return words[0].lower()
+    return None
+
 def collect(config: Config) -> None:
     print(f"Collecting commits from {config.collect.from_sha} to {config.collect.to_sha}...")
     commits: list[git.Commit] = git.get_commits(config.collect.from_sha, config.collect.to_sha, config.collect.repo_path)
@@ -21,13 +53,17 @@ def collect(config: Config) -> None:
     for commit in commits:
         is_core_team = any(regex.match(commit.author) for regex in core_team_regexes)
         classification = classify_commit(commit)
+        # Extract component from title (e.g., "feat odbc: improve driver" -> "odbc")
+        component = _extract_component_from_title(commit.title)
+        
         classified_commit = ClassifiedCommit(
             **commit.model_dump(),
             classification=classification,
             is_external=not is_core_team,
             to_changelog=None,
             changelog_line=None,
-            commit_analysis=None
+            commit_analysis=None,
+            component=component
         )
         
         if classification in [Classification.FEATURE, Classification.BUG, Classification.BREAKING_CHANGE]:
diff --git a/scripts/changelog_tool/changelog_tool/llm/client.py b/scripts/changelog_tool/changelog_tool/llm/client.py
index 26880f6e8e9b..76525cba7d05 100644
--- a/scripts/changelog_tool/changelog_tool/llm/client.py
+++ b/scripts/changelog_tool/changelog_tool/llm/client.py
@@ -44,6 +44,7 @@ def __init__(self, config: LLMConfig):
             raise RuntimeError("Missing required environment variable: either CHANGELOG_LLM_API_KEY or CHANGELOG_LLM_OAUTH_KEY must be set")
             
         self.limiter = AsyncLimiter(config.target_rps, 1)
+        self.semaphore = asyncio.Semaphore(config.max_concurrent_requests)
         
         http_client = httpx.AsyncClient(verify=False)
         
@@ -58,16 +59,17 @@ def __init__(self, config: LLMConfig):
     async def generate(self, prompt: str) -> str:
         last_error = None
         
-        for attempt in range(self.retries + 1):
-            try:
-                async with self.limiter:
-                    if attempt > 0:
-                        print(f"  Retrying ({attempt}/{self.retries})...")
-                    
-                    response = await self.client.chat.completions.create(
-                        model=self.model,
-                        messages=[{"role": "user", "content": prompt}],
-                    )
+        async with self.semaphore:
+            for attempt in range(self.retries + 1):
+                try:
+                    async with self.limiter:
+                        if attempt > 0:
+                            print(f"  Retrying ({attempt}/{self.retries})...")
+                        
+                        response = await self.client.chat.completions.create(
+                            model=self.model,
+                            messages=[{"role": "user", "content": prompt}],
+                        )
                     
                     # Handle non-standard API response format
                     # Some APIs return the actual data in a 'response' dict attribute
@@ -102,30 +104,30 @@ async def generate(self, prompt: str) -> str:
                     content = message.content
                     return content or ""
                     
-            except openai.RateLimitError as e:
-                last_error = f"Rate limit: {e}"
-                print(f"  Rate limit hit, waiting...")
-                await asyncio.sleep(2 ** attempt)
-                continue
-            except openai.APIStatusError as e:
-                if e.status_code in (400, 401, 403, 404):
-                    raise LLMError(f"Critical LLM error: {e.status_code} - {e.message}")
-                if e.status_code >= 500:
-                    last_error = f"Server error {e.status_code}"
-                    print(f"  Server error, retrying...")
+                except openai.RateLimitError as e:
+                    last_error = f"Rate limit: {e}"
+                    print(f"  Rate limit hit, waiting...")
+                    await asyncio.sleep(2 ** attempt)
+                    continue
+                except openai.APIStatusError as e:
+                    if e.status_code in (400, 401, 403, 404):
+                        raise LLMError(f"Critical LLM error: {e.status_code} - {e.message}")
+                    if e.status_code >= 500:
+                        last_error = f"Server error {e.status_code}"
+                        print(f"  Server error, retrying...")
+                        await asyncio.sleep(2 ** attempt)
+                        continue
+                    raise LLMError(f"Unexpected status {e.status_code}: {e.message}")
+                except openai.APIError as e:
+                    last_error = f"API error: {e}"
+                    print(f"  API error, retrying...")
+                    await asyncio.sleep(2 ** attempt)
+                    continue
+                except Exception as e:
+                    last_error = f"Client error: {e}"
+                    print(f"  Error, retrying...")
                     await asyncio.sleep(2 ** attempt)
                     continue
-                raise LLMError(f"Unexpected status {e.status_code}: {e.message}")
-            except openai.APIError as e:
-                last_error = f"API error: {e}"
-                print(f"  API error, retrying...")
-                await asyncio.sleep(2 ** attempt)
-                continue
-            except Exception as e:
-                last_error = f"Client error: {e}"
-                print(f"  Error, retrying...")
-                await asyncio.sleep(2 ** attempt)
-                continue
                 
         raise LLMTransientError(f"Max retries ({self.retries}) exceeded. Last error: {last_error}")
 
diff --git a/scripts/changelog_tool/changelog_tool/llm/config.py b/scripts/changelog_tool/changelog_tool/llm/config.py
index 5327103bd420..571683c63736 100644
--- a/scripts/changelog_tool/changelog_tool/llm/config.py
+++ b/scripts/changelog_tool/changelog_tool/llm/config.py
@@ -7,3 +7,4 @@ class LLMConfig(pydantic.BaseModel):
     max_user_prompt_length: int = 8000
     include_diff: bool = True
     truncate_diff: bool = True
+    max_concurrent_requests: int = 5
diff --git a/scripts/changelog_tool/changelog_tool/report/command.py b/scripts/changelog_tool/changelog_tool/report/command.py
index 073736d7b0d4..cf8da1034873 100644
--- a/scripts/changelog_tool/changelog_tool/report/command.py
+++ b/scripts/changelog_tool/changelog_tool/report/command.py
@@ -97,6 +97,11 @@ def _apply_overrides(commits: List[ClassifiedCommit], override_file: pathlib.Pat
                 commit.to_changelog = override['to_changelog']
             if 'changelog_line' in override:
                 commit.changelog_line = override['changelog_line']
+            if 'classification' in override:
+                try:
+                    commit.classification = Classification(override['classification'])
+                except ValueError:
+                    pass
 
 
 def _generate_changelog(commits: List[ClassifiedCommit], github_url: str) -> str:
@@ -138,30 +143,72 @@ def _generate_changelog(commits: List[ClassifiedCommit], github_url: str) -> str
         lines.append(f"* {section_title}")
         lines.append("")
         
-        # Commit entries
+        # Group commits by component within each classification
+        component_groups: Dict[str, List[ClassifiedCommit]] = {}
+        commits_without_component = []
+        
         for commit in section_commits:
-            line = f"  * {commit.changelog_line}"
+            if commit.component:
+                if commit.component not in component_groups:
+                    component_groups[commit.component] = []
+                component_groups[commit.component].append(commit)
+            else:
+                commits_without_component.append(commit)
+        
+        # Generate entries for each component
+        for component in sorted(component_groups.keys()):
+            component_commits = component_groups[component]
+            lines.append(f"  * {component}")
+            lines.append("")
             
-            # Add external contributor thanks
-            if commit.is_external:
-                author_name = _extract_author_name(commit.author)
-                line += f" Many thanks to {author_name} for the PR!"
+            for commit in component_commits:
+                short_sha = commit.sha[:8]
+                line = f"    * {commit.changelog_line} <!-- {short_sha} -->"
+                
+                # Add external contributor thanks
+                if commit.is_external:
+                    author_name = _extract_author_name(commit.author)
+                    line += f" Many thanks to {author_name} for the PR!"
+                
+                lines.append(line)
             
-            lines.append(line)
+            lines.append("")
         
-        lines.append("")
+        # Generate entries for commits without component
+        if commits_without_component:
+            for commit in commits_without_component:
+                short_sha = commit.sha[:8]
+                line = f"  * {commit.changelog_line} <!-- {short_sha} -->"
+                
+                # Add external contributor thanks
+                if commit.is_external:
+                    author_name = _extract_author_name(commit.author)
+                    line += f" Many thanks to {author_name} for the PR!"
+                
+                lines.append(line)
+            
+            lines.append("")
     
     # Collect external contributors not in changelog
-    external_contributors_not_in_changelog = set()
+    # Group by author and collect their commit titles
+    external_contributors_not_in_changelog: Dict[str, List[str]] = {}
     for commit in commits:
         if commit.is_external and (commit.to_changelog is False or commit.to_changelog is None):
             author_name = _extract_author_name(commit.author)
-            external_contributors_not_in_changelog.add(author_name)
+            if author_name not in external_contributors_not_in_changelog:
+                external_contributors_not_in_changelog[author_name] = []
+            external_contributors_not_in_changelog[author_name].append(commit.title)
     
     if external_contributors_not_in_changelog:
         lines.append("* Many thanks to:")
-        for contributor in sorted(external_contributors_not_in_changelog):
-            lines.append(f"  * {contributor} for the contribution!")
+        for contributor in sorted(external_contributors_not_in_changelog.keys()):
+            titles = external_contributors_not_in_changelog[contributor]
+            if len(titles) == 1:
+                lines.append(f"  * {contributor} for {titles[0]}!")
+            else:
+                lines.append(f"  * {contributor} for:")
+                for title in titles:
+                    lines.append(f"    * {title}")
         lines.append("")
     
     return "\n".join(lines)

From 1bd78a29299fe12728af7529e0ebfe9eb2968971 Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 11:50:10 +0300
Subject: [PATCH 13/14] changelog-tool readme

---
 scripts/changelog_tool/README.md | 174 +++++++++++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 scripts/changelog_tool/README.md

diff --git a/scripts/changelog_tool/README.md b/scripts/changelog_tool/README.md
new file mode 100644
index 000000000000..3edc19efe209
--- /dev/null
+++ b/scripts/changelog_tool/README.md
@@ -0,0 +1,174 @@
+# Changelog Tool
+
+A tool for automatically generating changelogs from git commits using LLM analysis.
+
+## Features
+
+- **Automatic commit classification**: Classifies commits into categories (feature, bug, optimization, refactor, minor, docs, unclear)
+- **LLM-powered analysis**: Uses LLM to analyze commits and generate changelog entries
+- **External contributor detection**: Identifies external contributors and generates acknowledgments
+- **Component extraction**: Extracts component names from commit titles for better organization
+- **Override support**: Allows manual override of classifications and changelog entries
+- **State persistence**: Saves LLM analysis results to avoid reprocessing
+- **Rate limiting**: Configurable rate limiting and concurrent request limits
+
+## Installation
+
+1. Ensure you have Python 3.8+ installed
+2. Install dependencies:
+```bash
+cd scripts/changelog_tool
+python3 -m venv .venv
+source .venv/bin/activate
+python3 -m pip3 install -r requirements.txt
+```
+
+3. Set up environment variables:
+```bash
+export CHANGELOG_LLM_URL="https://your-llm-api.com/v1"
+export CHANGELOG_LLM_API_KEY="your-api-key" 
+export CHANGELOG_LLM_MODEL="your-model-name"
+```
+
+## Configuration
+
+The tool is configured via `changelog.yaml`:
+
+```yaml
+collect:
+  from_sha: <commit-sha>  # Starting commit SHA
+  to_sha: HEAD           # Ending commit SHA (default: HEAD)
+  repo_path: ../..       # Path to the repository (default: ../..)
+  core_team_patterns:    # Patterns to identify core team members
+    - ".*@userver\\.tech"
+    - ".*@yandex-team\\.com"
+
+llm-config:
+  target_rps: 1                    # Target requests per second
+  retries: 3                       # Number of retry attempts
+  max_commits_per_batch: 10        # Maximum commits per LLM batch
+  max_user_prompt_length: 100000   # Maximum prompt length in characters
+  include_diff: true               # Include diff in LLM prompt
+  truncate_diff: false             # Truncate diff if too long
+  max_concurrent_requests: 5       # Maximum concurrent requests
+
+review:
+  github_url: "https://github.com/userver-framework/userver"
+
+report:
+  github_url: "https://github.com/userver-framework/userver"
+```
+
+## Usage
+
+### Step 1: Collect Commits
+
+Run the `collect` command to gather commits and analyze them:
+
+```bash
+source .venv/bin/activate
+./changelog-tool collect
+```
+
+The tool will:
+1. Fetch commits from the specified range
+2. Classify commits using heuristics
+3. Send unclear commits to LLM for analysis
+4. Save results to `.changelog/classified.json`
+
+**Important**: Run the `collect` command repeatedly until you see a message like:
+```
+Found 10 commits, 10 already processed, 0 to process via LLM
+```
+
+This ensures all commits have been processed by the LLM. The tool uses state persistence to avoid reprocessing commits, so running it multiple times is safe and recommended for reliability.
+
+### Step 2: Review and Override
+
+Run the `review` command to generate a review report:
+
+```bash
+./changelog-tool review
+```
+
+This generates two files in `.changelog/`:
+- `review_report.md`: A markdown report showing all commits with their classification status
+- `override.yaml`: A commented YAML file for overriding classifications
+
+Review the report and uncomment/modify entries in `override.yaml` to override classifications:
+
+```yaml
+# Example override.yaml
+commit_sha_1:
+  to_changelog: true
+  changelog_line: "Added support for async LLM processing"
+
+commit_sha_2:
+  to_changelog: false
+  classification: "minor"
+```
+
+Feel free to leave classification or changelog_line empty LLM will handle it on the next step.
+
+### Step 3: Generate Changelog
+
+Run the `report` command to generate the final changelog:
+
+```bash
+./changelog-tool report
+```
+
+This will:
+1. Load classified commits from `classified.json`
+2. Apply overrides from `override.yaml`
+3. Process commits needing LLM analysis with increased prompt size (1.5x) and diff truncation
+4. Generate a formatted Markdown changelog grouped by classification and component
+5. Save the changelog to `.changelog/changelog.md`
+
+**Important**: Run the `report` command repeatedly until you see a message like:
+```
+Found 10 commits, 10 already processed, 0 to process via LLM
+```
+
+This ensures all commits that need LLM analysis have been processed.
+
+## Output Format
+
+The generated changelog has the following structure:
+
+```markdown
+* Breaking Change
+  * component1
+    * changelog line 1 <!-- abc12345 -->
+    * changelog line 2 <!-- def67890 -->
+  * changelog line without component <!-- ghi12345 -->
+
+* Feature
+  * component1
+    * changelog line 3 <!-- jkl67890 -->
+  * changelog line without component <!-- mno12345 -->
+
+* Optimization
+  * component2
+    * changelog line 4 <!-- pqr12345 -->
+
+* Bug
+  * component1
+    * changelog line 5 <!-- stu67890 -->
+
+* Refactor
+  * component3
+    * changelog line 6 <!-- vwx12345 -->
+
+* Minor
+  * changelog line 7 <!-- yza67890 -->
+
+* Documentation
+  * changelog line 8 <!-- bcd12345 -->
+
+* Many thanks to:
+  * External Contributor 1 for commit title 1!
+  * External Contributor 2 for:
+    * commit title 2
+    * commit title 3
+```
\ No newline at end of file

From ca9f2963ea247c0f04d2e4bf0a6b55db2a8ec621 Mon Sep 17 00:00:00 2001
From: Ivan Skriabin <dddccc20000@mail.ru>
Date: Wed, 10 Jun 2026 18:01:36 +0300
Subject: [PATCH 14/14] cleanup russian

---
 .../changelog_tool/collect/command.py         |  2 +-
 .../changelog_tool/common/git.py              | 10 ++--
 .../changelog_tool/llm/client.py              |  6 +--
 .../changelog_tool/llm/exceptions.py          |  4 +-
 .../changelog_tool/llm/processor.py           | 46 +++++++++----------
 .../changelog_tool/llm/state.py               | 18 ++++----
 6 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/scripts/changelog_tool/changelog_tool/collect/command.py b/scripts/changelog_tool/changelog_tool/collect/command.py
index 9e02ccb07adc..8275b1dc7d3a 100644
--- a/scripts/changelog_tool/changelog_tool/collect/command.py
+++ b/scripts/changelog_tool/changelog_tool/collect/command.py
@@ -95,7 +95,7 @@ def collect(config: Config) -> None:
             try:
                 commit.classification = Classification(result.get("classification", "unclear"))
             except ValueError:
-                # Если LLM вернула неизвестную классификацию, оставляем UNCLEAR
+                # If LLM returned an unknown classification, keep UNCLEAR
                 pass
             
             commit.to_changelog = result.get("to_changelog")
diff --git a/scripts/changelog_tool/changelog_tool/common/git.py b/scripts/changelog_tool/changelog_tool/common/git.py
index f240d2ae38f7..4c47c0605274 100644
--- a/scripts/changelog_tool/changelog_tool/common/git.py
+++ b/scripts/changelog_tool/changelog_tool/common/git.py
@@ -7,22 +7,22 @@
 
 
 class GitError(Exception):
-    """Любая ошибка при работе с git."""
+    """Any error when working with git."""
 
 
 class FileChange(pydantic.BaseModel):
     path: str
-    old_path: str | None = None   # None если файл не переименован
+    old_path: str | None = None   # None if file is not renamed
     added_lines: int = 0
     removed_lines: int = 0
 
 
 class Commit(pydantic.BaseModel):
     sha: str
-    title: str                                            # первая строка message
-    message: str                                          # полный message
+    title: str                                            # first line of message
+    message: str                                          # full message
     author: str                                           # "Name <email>"
-    co_authors: list[str]                                 # из "Co-authored-by:"
+    co_authors: list[str]                                 # from "Co-authored-by:"
     changed_files: list[FileChange]
     total_added: int = 0
     total_removed: int = 0
diff --git a/scripts/changelog_tool/changelog_tool/llm/client.py b/scripts/changelog_tool/changelog_tool/llm/client.py
index 76525cba7d05..1f1a7610025a 100644
--- a/scripts/changelog_tool/changelog_tool/llm/client.py
+++ b/scripts/changelog_tool/changelog_tool/llm/client.py
@@ -14,14 +14,14 @@ class BaseLLMClient(ABC):
     @abstractmethod
     async def generate(self, prompt: str) -> str:
         """
-        Асинхронно отправляет текстовый промпт в LLM и возвращает текстовый ответ.
-        Может выбрасывать LLMError или LLMTransientError.
+        Asynchronously sends a text prompt to the LLM and returns a text response.
+        May throw LLMError or LLMTransientError.
         """
         pass
 
     @abstractmethod
     async def close(self):
-        """Закрывает ресурсы клиента."""
+        """Closes client resources."""
         pass
 
 
diff --git a/scripts/changelog_tool/changelog_tool/llm/exceptions.py b/scripts/changelog_tool/changelog_tool/llm/exceptions.py
index 3aa06096ec86..54bdf1a53562 100644
--- a/scripts/changelog_tool/changelog_tool/llm/exceptions.py
+++ b/scripts/changelog_tool/changelog_tool/llm/exceptions.py
@@ -1,7 +1,7 @@
 class LLMError(Exception):
-    """Критическая ошибка LLM (например, неверный формат запроса, 400 Bad Request)."""
+    """Critical LLM error (e.g., invalid request format, 400 Bad Request)."""
     pass
 
 class LLMTransientError(LLMError):
-    """Временная ошибка LLM (например, 500, 503, таймаут или исчерпаны попытки ретраев)."""
+    """Transient LLM error (e.g., 500, 503, timeout or retries exhausted)."""
     pass
diff --git a/scripts/changelog_tool/changelog_tool/llm/processor.py b/scripts/changelog_tool/changelog_tool/llm/processor.py
index ac3bde54142b..f46bc65745ec 100644
--- a/scripts/changelog_tool/changelog_tool/llm/processor.py
+++ b/scripts/changelog_tool/changelog_tool/llm/processor.py
@@ -18,20 +18,20 @@ def __init__(self, config: LLMConfig, llm_client: BaseLLMClient, output_dir: Pat
         
     async def process_commits(self, commits: List[Commit]) -> Dict[str, Dict[str, Any]]:
         """
-        Асинхронно обрабатывает список коммитов через LLM.
-        Возвращает словарь SHA -> dict с результатами (classification, changelog_line, detailed_commit_analysis).
+        Asynchronously processes a list of commits through the LLM.
+        Returns a dictionary SHA -> dict with results (classification, changelog_line, detailed_commit_analysis).
         """
-        # Загружаем и очищаем стейт
+        # Load and clean state
         await self.state.load()
         valid_shas = {commit.sha for commit in commits}
         await self.state.cleanup(valid_shas)
         
-        # Фильтруем коммиты для обработки
+        # Filter commits for processing
         commits_to_process = []
         results = {}
         
         for commit in commits:
-            # Проверяем стейт
+            # Check state
             result = await self.state.get_result(commit.sha)
             if result:
                 results[commit.sha] = result
@@ -43,23 +43,23 @@ async def process_commits(self, commits: List[Commit]) -> Dict[str, Dict[str, An
         if not commits_to_process:
             return results
             
-        # Разбиваем на батчи с учетом размера промпта
+        # Split into batches considering prompt size
         batches = self._create_smart_batches(commits_to_process)
         
         total_commits = sum(len(batch) for batch in batches)
         print(f"Processing {total_commits} commits in {len(batches)} batches...")
         
-        # Обрабатываем батчи параллельно
+        # Process batches in parallel
         batch_results = await asyncio.gather(
             *[self._process_batch(batch, i, len(batches), total_commits) for i, batch in enumerate(batches)],
             return_exceptions=True
         )
         
-        # Собираем результаты
+        # Collect results
         completed_batches = 0
         for batch_idx, batch_result in enumerate(batch_results):
             if isinstance(batch_result, Exception):
-                # Ошибки в батчах уже записаны в стейт, просто продолжаем
+                # Batch errors are already written to state, just continue
                 continue
             completed_batches += 1
             results.update(batch_result)
@@ -69,17 +69,17 @@ async def process_commits(self, commits: List[Commit]) -> Dict[str, Dict[str, An
         return results
         
     async def _process_batch(self, batch: List[Commit], batch_idx: int = 0, total_batches: int = 0, total_commits: int = 0) -> Dict[str, Dict[str, Any]]:
-        """Обрабатывает один батч коммитов."""
+        """Processes one batch of commits."""
         try:
             print(f"[{batch_idx + 1}/{total_batches}] Processing {len(batch)} commits...")
             prompt = self._build_prompt(batch)
             
-            # Проверяем длину промпта
+            # Check prompt length
             if len(prompt) > self.config.max_user_prompt_length:
                 if self.config.truncate_diff:
                     prompt = self._truncate_prompt(prompt)
                 else:
-                    # Помечаем все коммиты батча как ошибочные
+                    # Mark all batch commits as erroneous
                     error_msg = f"Prompt too long ({len(prompt)} > {self.config.max_user_prompt_length})"
                     for commit in batch:
                         await self.state.set_error(commit.sha, error_msg)
@@ -92,7 +92,7 @@ async def _process_batch(self, batch: List[Commit], batch_idx: int = 0, total_ba
                         } for commit in batch
                     }
                     
-            # Отправляем в LLM
+            # Send to LLM
             response_text = await self.llm_client.generate(prompt)
             
             # Remove markdown code blocks if present
@@ -104,17 +104,17 @@ async def _process_batch(self, batch: List[Commit], batch_idx: int = 0, total_ba
                 response_text = response_text.strip()[:-3]  # Remove trailing ```
             response_text = response_text.strip()
             
-            # Парсим ответ
+            # Parse response
             try:
                 response_data = json.loads(response_text)
             except json.JSONDecodeError as e:
                 raise LLMError(f"LLM returned invalid JSON: {e}")
                 
-            # Проверяем формат ответа
+            # Check response format
             if not isinstance(response_data, dict):
                 raise LLMError("LLM returned invalid response format (not a dict)")
                 
-            # Сохраняем результаты и возвращаем
+            # Save results and return
             results = {}
             for commit in batch:
                 commit_data = response_data.get(commit.sha, {})
@@ -146,10 +146,10 @@ async def _process_batch(self, batch: List[Commit], batch_idx: int = 0, total_ba
             return results
             
         except LLMError:
-            # Критическая ошибка - пробрасываем дальше
+            # Critical error - re-raise
             raise
         except Exception as e:
-            # Временная ошибка или другая проблема - помечаем коммиты как ошибочные
+            # Temporary error or other problem - mark commits as erroneous
             error_msg = f"{type(e).__name__}: {str(e)}"
             print(f"✗ Batch {batch_idx + 1}/{total_batches} failed: {error_msg}")
             for commit in batch:
@@ -197,7 +197,7 @@ def _create_smart_batches(self, commits: List[Commit]) -> List[List[Commit]]:
         return batches
     
     def _estimate_commit_size(self, commit: Commit) -> int:
-        """Оценивает размер промпта для одного коммита."""
+        """Estimates the prompt size for one commit."""
         size = len(commit.sha) + len(commit.title) + len(commit.message)
         size += len(', '.join(f.path for f in commit.changed_files))
         
@@ -205,10 +205,10 @@ def _estimate_commit_size(self, commit: Commit) -> int:
             diff = get_commit_diff(commit)
             size += len(diff)
         
-        return size + 200  # запас на JSON форматирование и разделители
+        return size + 200  # reserve for JSON formatting and separators
     
     def _build_prompt(self, commits: List[Commit]) -> str:
-        """Формирует промпт для батча коммитов."""
+        """Forms a prompt for a batch of commits."""
         system_prompt = """You are an expert software engineer analyzing git commits for a changelog.
 Your task is to analyze commits since the last release and highlight important and interesting changes.
 Ignore simple bugfixes, typos, and minor refactoring.
@@ -273,8 +273,8 @@ def _build_prompt(self, commits: List[Commit]) -> str:
         return f"{system_prompt}\n\n{user_prompt}"
         
     def _truncate_prompt(self, prompt: str) -> str:
-        """Обрезает промпт до допустимой длины."""
-        # Простая обрезка - в реальности может потребоваться более умная логика
+        """Truncates the prompt to the allowed length."""
+        # Simple truncation - in reality, smarter logic may be needed
         if len(prompt) <= self.config.max_user_prompt_length:
             return prompt
         return prompt[:self.config.max_user_prompt_length]
\ No newline at end of file
diff --git a/scripts/changelog_tool/changelog_tool/llm/state.py b/scripts/changelog_tool/changelog_tool/llm/state.py
index 2295c2e6e35c..3bdb702a0279 100644
--- a/scripts/changelog_tool/changelog_tool/llm/state.py
+++ b/scripts/changelog_tool/changelog_tool/llm/state.py
@@ -10,13 +10,13 @@ def __init__(self, state_file_path: Path):
         self.lock = asyncio.Lock()
         
     async def load(self) -> None:
-        """Асинхронно загружает состояние из файла."""
+        """Asynchronously loads state from file."""
         async with self.lock:
             if self.state_file_path.exists():
                 try:
                     with open(self.state_file_path, 'r', encoding='utf-8') as f:
                         loaded_state = json.load(f)
-                        # Убедимся, что состояние имеет правильный формат
+                        # Ensure the state has the correct format
                         if isinstance(loaded_state, dict):
                             self.state = loaded_state
                         else:
@@ -28,11 +28,11 @@ async def load(self) -> None:
                 self.state = {}
                 
     async def save(self) -> None:
-        """Асинхронно сохраняет состояние в файл."""
-        # Создаем директорию если её нет
+        """Asynchronously saves state to file."""
+        # Create directory if it doesn't exist
         self.state_file_path.parent.mkdir(parents=True, exist_ok=True)
         
-        # Атомарная запись через временный файл
+        # Atomic write through temporary file
         temp_file = self.state_file_path.with_suffix('.tmp')
         try:
             with open(temp_file, 'w', encoding='utf-8') as f:
@@ -44,7 +44,7 @@ async def save(self) -> None:
                 temp_file.unlink()
                     
     async def cleanup(self, valid_shas: Set[str]) -> None:
-        """Удаляет из стейта коммиты, не попавшие в текущую выборку."""
+        """Removes from state commits that are not in the current selection."""
         async with self.lock:
             keys_to_remove = set(self.state.keys()) - valid_shas
             for key in keys_to_remove:
@@ -53,7 +53,7 @@ async def cleanup(self, valid_shas: Set[str]) -> None:
                 await self.save()
                 
     async def get_result(self, sha: str) -> Optional[Dict[str, Any]]:
-        """Возвращает результат анализа коммита, если он есть и не содержит ошибки."""
+        """Returns the commit analysis result if it exists and contains no errors."""
         async with self.lock:
             commit_data = self.state.get(sha)
             if commit_data and commit_data.get("error") is None:
@@ -61,7 +61,7 @@ async def get_result(self, sha: str) -> Optional[Dict[str, Any]]:
             return None
             
     async def set_result(self, sha: str, classification: str, changelog_line: str, detailed_commit_analysis: str, to_changelog: bool = False) -> int:
-        """Сохраняет успешный результат классификации. Возвращает количество готовых коммитов."""
+        """Saves successful classification result. Returns the number of completed commits."""
         async with self.lock:
             self.state[sha] = {
                 "classification": classification,
@@ -75,7 +75,7 @@ async def set_result(self, sha: str, classification: str, changelog_line: str, d
             return completed
             
     async def set_error(self, sha: str, error_message: str) -> None:
-        """Сохраняет ошибку классификации."""
+        """Saves classification error."""
         async with self.lock:
             self.state[sha] = {
                 "classification": "unclear",