Skip to content

Commit 242f379

Browse files
Add Python type stub (.pyi) generation for tesseract_decoder (#189)
Addresses #154. 1. Creates python stub file generation script using pybind11-stubgen. This script is then called via a genrule when building the Wheel 2. Adds tests to ensure stubfiles exist and are valid python (maybe we can drop the valid python check + symbol expectation, WDYT?). Note that I think there is an issue with the visualizer stubs since Smaller changes: a. Adds pybind11-stubgen to the requirements file. Verification: Unit tests, and just seeing if autocomplete works now: Autocomplete without stubfiles: <img width="396" height="161" alt="Screenshot 2026-02-20 at 9 12 48 AM" src="https://github.com/user-attachments/assets/c2697630-e9f0-4041-9ddb-fe4f60558b0a" /> Autocomplete with stubfiles: <img width="740" height="355" alt="Screenshot 2026-02-20 at 9 09 29 AM" src="https://github.com/user-attachments/assets/48843008-2558-4379-ab65-d6605fc4c3af" />
1 parent 273acf1 commit 242f379

8 files changed

Lines changed: 340 additions & 2 deletions

File tree

BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ py_wheel(
2020
distribution = "tesseract_decoder",
2121
deps=[
2222
"//src:tesseract_decoder",
23+
"//src/py:generated_stubs",
2324
"//src/py/_tesseract_py_util:_tesseract_py_util",
2425
":package_data",
2526
],

src/BUILD

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ pybind_library(
7272
"tesseract.pybind.h",
7373
"tesseract_sinter_compat.pybind.h",
7474
],
75+
copts = OPT_COPTS,
7576
deps = [
7677
":libcommon",
7778
":libutils",
@@ -85,6 +86,7 @@ pybind_extension(
8586
srcs = [
8687
"tesseract.pybind.cc",
8788
],
89+
copts = OPT_COPTS,
8890
deps = [
8991
":tesseract_decoder_pybind",
9092
],

src/py/BUILD

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
load("@rules_python//python:py_test.bzl", "py_test")
1616
load("@rules_python//python:pip.bzl", "compile_pip_requirements")
1717
load("@rules_python//python:py_library.bzl", "py_library")
18+
load("@rules_python//python:py_binary.bzl", "py_binary")
1819

1920
py_library(
2021
name = "shared_decoding_tests",
@@ -94,6 +95,46 @@ py_test(
9495

9596

9697

98+
py_test(
99+
name = "stub_test",
100+
srcs = ["stub_test.py"],
101+
data = [":generated_stubs"],
102+
visibility = ["//:__subpackages__"],
103+
deps = [
104+
"@pypi//pytest",
105+
],
106+
imports = ["..", "."],
107+
)
108+
109+
py_binary(
110+
name = "generate_stubs",
111+
srcs = ["generate_stubs.py"],
112+
deps = [
113+
"//src:lib_tesseract_decoder",
114+
"@pypi//pybind11_stubgen",
115+
"@pypi//stim",
116+
],
117+
imports = ["..", "."],
118+
)
119+
120+
STUB_FILES = [
121+
"__init__.pyi",
122+
"common.pyi",
123+
"simplex.pyi",
124+
"tesseract.pyi",
125+
"tesseract_sinter_compat.pyi",
126+
"utils.pyi",
127+
"viz.pyi",
128+
]
129+
130+
genrule(
131+
name = "generated_stubs",
132+
tools = [":generate_stubs"],
133+
outs = ["tesseract_decoder-stubs/" + f for f in STUB_FILES],
134+
cmd = "$(location :generate_stubs) --output-dir $(@D)",
135+
visibility = ["//visibility:public"],
136+
)
137+
97138
compile_pip_requirements(
98139
name = "requirements",
99140
src = "requirements.in",

src/py/generate_stubs.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#!/usr/bin/env python3
2+
# Copyright 2025 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Generate Python type stub (.pyi) files for the tesseract_decoder module.
17+
18+
This script uses pybind11-stubgen to produce .pyi stub files for the tesseract decoder
19+
module's C++ API. The .pyi stub files provide information for IDEs.
20+
21+
Built from //src/py:generated_stubs which uses a genrule to call this script.
22+
"""
23+
24+
import argparse
25+
import os
26+
import sys
27+
28+
29+
def _ensure_module_importable():
30+
"""Ensure tesseract_decoder is importable, adjusting sys.path if needed."""
31+
32+
try:
33+
import tesseract_decoder
34+
return tesseract_decoder
35+
except Exception as e:
36+
import traceback
37+
print("====== DEBUG INFO ======", file=sys.stderr)
38+
print(f"Exception: {type(e).__name__}: {e}", file=sys.stderr)
39+
print(f"sys.path: {sys.path}", file=sys.stderr)
40+
print(f"__file__: {__file__}", file=sys.stderr)
41+
print(f"cwd: {os.getcwd()}", file=sys.stderr)
42+
print("========================", file=sys.stderr)
43+
print(
44+
f"ERROR: Cannot import tesseract_decoder.\n"
45+
"Ensure the compiled module is on sys.path or PYTHONPATH.",
46+
file=sys.stderr,
47+
)
48+
traceback.print_exc()
49+
sys.exit(1)
50+
51+
52+
def main():
53+
parser = argparse.ArgumentParser(
54+
description="Generate .pyi stubs for tesseract_decoder"
55+
)
56+
parser.add_argument(
57+
"--output-dir",
58+
default=None,
59+
help="Directory to place the generated .pyi file. "
60+
"Defaults to the directory containing the tesseract_decoder module.",
61+
)
62+
args = parser.parse_args()
63+
64+
module = _ensure_module_importable()
65+
module_file = os.path.abspath(module.__file__)
66+
module_dir = os.path.dirname(module_file)
67+
68+
output_dir = os.path.abspath(args.output_dir) if args.output_dir else module_dir
69+
70+
print(f"Generating stubs for tesseract_decoder...")
71+
print(f" Module location: {module_file}")
72+
print(f" Output directory: {output_dir}")
73+
74+
os.makedirs(output_dir, exist_ok=True)
75+
76+
# Use pybind11_stubgen programmatically.
77+
try:
78+
from pybind11_stubgen import main as stubgen_main
79+
except ImportError as e:
80+
print(f"ImportError: {e}", file=sys.stderr)
81+
print(f"sys.path: {sys.path}", file=sys.stderr)
82+
print(
83+
"ERROR: pybind11-stubgen is not installed. "
84+
"Install with: pip install pybind11-stubgen",
85+
file=sys.stderr,
86+
)
87+
sys.exit(1)
88+
89+
# Build argv for pybind11-stubgen CLI.
90+
# --enum-class-locations maps enum names to their fully-qualified module path
91+
# so pybind11-stubgen can resolve default values like <DetOrder.DetBFS: 0>.
92+
stubgen_argv = [
93+
"pybind11-stubgen",
94+
"tesseract_decoder",
95+
"--output-dir",
96+
output_dir,
97+
"--enum-class-locations",
98+
"DetOrder:tesseract_decoder.utils",
99+
]
100+
101+
# Save and restore sys.argv since pybind11-stubgen uses argparse.
102+
old_argv = sys.argv
103+
sys.argv = stubgen_argv
104+
try:
105+
stubgen_main()
106+
except SystemExit as e:
107+
if e.code != 0:
108+
print(f"ERROR: pybind11-stubgen exited with code {e.code}", file=sys.stderr)
109+
sys.exit(1)
110+
finally:
111+
sys.argv = old_argv
112+
113+
# Verify the output exists, and append -stubs to the directory name.
114+
original_stub_pkg_dir = os.path.join(output_dir, "tesseract_decoder")
115+
stub_pkg_dir = os.path.join(output_dir, "tesseract_decoder-stubs")
116+
if os.path.exists(original_stub_pkg_dir):
117+
import shutil
118+
if os.path.exists(stub_pkg_dir):
119+
shutil.rmtree(stub_pkg_dir)
120+
os.rename(original_stub_pkg_dir, stub_pkg_dir)
121+
122+
stub_init = os.path.join(stub_pkg_dir, "__init__.pyi")
123+
124+
if os.path.isfile(stub_init):
125+
print(f"Stubs generated successfully at: {stub_pkg_dir}/")
126+
for root, dirs, files in os.walk(stub_pkg_dir):
127+
for f in sorted(files):
128+
if f.endswith(".pyi"):
129+
rel = os.path.relpath(os.path.join(root, f), output_dir)
130+
print(f" {rel}")
131+
else:
132+
flat_stub = os.path.join(output_dir, "tesseract_decoder.pyi")
133+
if os.path.isfile(flat_stub):
134+
print(f"Stubs generated successfully: {flat_stub}")
135+
else:
136+
print("WARNING: Could not verify stub output location.", file=sys.stderr)
137+
138+
print("Done.")
139+
140+
141+
if __name__ == "__main__":
142+
main()

src/py/requirements.in

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
stim
22
pytest
3-
sinter
3+
sinter
4+
pybind11-stubgen

src/py/requirements_lock.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,10 @@ pluggy==1.6.0 \
455455
--hash=sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3 \
456456
--hash=sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746
457457
# via pytest
458+
pybind11-stubgen==2.5.5 \
459+
--hash=sha256:10824cd2fc5cbbee032b8fb39e6f6c08de232deb309bc66d786a6c6e8a4601bd \
460+
--hash=sha256:758d6d6bbeefc62ad7f78d5e5bbf357ccf6af83cd4504f5f549403f452942708
461+
# via -r src/py/requirements.in
458462
pygments==2.19.1 \
459463
--hash=sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f \
460464
--hash=sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c

src/py/stub_test.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Tests that validate the pre-generated .pyi stub files for tesseract_decoder.
16+
17+
These stubs are generated by `bazel run //src/py:generate_stubs -- --output-dir src`
18+
and committed to the repository under `src/tesseract_decoder-stubs/`. This test
19+
validates that the stubs exist, are syntactically valid Python, and contain
20+
the expected symbols.
21+
"""
22+
23+
import ast
24+
import glob
25+
import os
26+
27+
import pytest
28+
29+
30+
def _find_stub_files():
31+
"""Find all .pyi stub files in the data runfiles."""
32+
# Find the src/py/tesseract_decoder-stubs/*.pyi files in the Bazel tree.
33+
pattern_genrule = os.path.join(
34+
os.environ["TEST_SRCDIR"],
35+
os.environ["TEST_WORKSPACE"],
36+
"src",
37+
"py",
38+
"tesseract_decoder-stubs",
39+
"*.pyi",
40+
)
41+
files = glob.glob(pattern_genrule)
42+
assert files, f"No stub files found in {pattern_genrule}"
43+
return files
44+
45+
46+
def _collect_all_names(pyi_files):
47+
"""Collect all defined names from a list of .pyi files."""
48+
all_names = set()
49+
for stub_path in pyi_files:
50+
with open(stub_path, "r") as f:
51+
content = f.read()
52+
tree = ast.parse(content)
53+
for node in ast.walk(tree):
54+
if isinstance(node, (ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)):
55+
all_names.add(node.name)
56+
elif isinstance(node, ast.Assign):
57+
for target in node.targets:
58+
if isinstance(target, ast.Name):
59+
all_names.add(target.id)
60+
elif isinstance(node, ast.ImportFrom):
61+
if node.names:
62+
for alias in node.names:
63+
all_names.add(
64+
alias.name if alias.asname is None else alias.asname
65+
)
66+
return all_names
67+
68+
69+
@pytest.fixture(scope="session")
70+
def stub_files():
71+
"""Collect all generated .pyi stub files."""
72+
files = _find_stub_files()
73+
if not files:
74+
pytest.skip(
75+
"No .pyi stub files found. Run "
76+
"'bazel run //src/py:generate_stubs -- --output-dir src' first."
77+
)
78+
return files
79+
80+
81+
class TestStubFilesExist:
82+
"""Tests that stub files exist and are valid Python."""
83+
84+
def test_stubs_generated(self, stub_files):
85+
"""At least one .pyi stub file should exist."""
86+
assert len(stub_files) > 0
87+
88+
EXPECTED_STUBS = [
89+
"__init__.pyi",
90+
"common.pyi",
91+
"simplex.pyi",
92+
"tesseract.pyi",
93+
"tesseract_sinter_compat.pyi",
94+
"utils.pyi",
95+
"viz.pyi",
96+
]
97+
98+
@pytest.mark.parametrize("filename", EXPECTED_STUBS)
99+
def test_expected_stub_exists(self, stub_files, filename):
100+
"""Each expected submodule stub file should be generated."""
101+
basenames = [os.path.basename(f) for f in stub_files]
102+
assert filename in basenames, (
103+
f"Missing expected stub file: {filename}. "
104+
f"Found: {basenames}"
105+
)
106+
107+
def test_stubs_are_valid_python(self, stub_files):
108+
"""All .pyi files should be parseable as valid Python."""
109+
for stub_path in stub_files:
110+
with open(stub_path, "r") as f:
111+
content = f.read()
112+
try:
113+
ast.parse(content)
114+
except SyntaxError as e:
115+
basename = os.path.basename(stub_path)
116+
pytest.fail(f"Stub file {basename} has invalid syntax: {e}")
117+
118+
119+
class TestStubContents:
120+
"""Tests that the generated stubs contain the expected symbols."""
121+
122+
EXPECTED_SYMBOLS = [
123+
"Symptom",
124+
"Error",
125+
"TesseractConfig",
126+
"TesseractDecoder",
127+
"TesseractSinterCompiledDecoder",
128+
"TesseractSinterDecoder",
129+
"SimplexConfig",
130+
"SimplexDecoder",
131+
"DetOrder",
132+
"Visualizer",
133+
"make_tesseract_sinter_decoders_dict",
134+
]
135+
136+
@pytest.mark.parametrize("symbol", EXPECTED_SYMBOLS)
137+
def test_expected_symbol_in_stubs(self, stub_files, symbol):
138+
"""Key symbols from the pybind11 module should appear in stubs."""
139+
all_names = _collect_all_names(stub_files)
140+
assert symbol in all_names, (
141+
f"Expected symbol '{symbol}' not found in stub files. "
142+
f"Found names: {sorted(all_names)}"
143+
)
144+
145+
146+
if __name__ == "__main__":
147+
raise SystemExit(pytest.main([__file__]))

0 commit comments

Comments
 (0)