Skip to content

Commit 8118fa7

Browse files
authored
Merge pull request #5 from SnoopJ/feature/pure-python-impl
Switch to pure Python implementation
2 parents 5462ce0 + f077443 commit 8118fa7

11 files changed

Lines changed: 83 additions & 1931 deletions

File tree

.github/workflows/build_wheels.yml

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,11 @@ jobs:
1414
- uses: actions/checkout@v3
1515

1616
- name: Build wheels
17-
uses: pypa/cibuildwheel@v2.22.0
18-
env:
19-
CIBW_TEST_REQUIRES: pytest
20-
CIBW_TEST_COMMAND: pytest -v {project}/tests
21-
# default values:
22-
# with:
23-
# package-dir: .
24-
# output-dir: wheelhouse
25-
# config-file: "{package}/pyproject.toml"
17+
run: |
18+
python3 -m pip install .[dev]
19+
python3 -m pytest
20+
python3 -m build .
2621
2722
- uses: actions/upload-artifact@v4
2823
with:
29-
path: ./wheelhouse/*.whl
24+
path: ./dist/*.whl

MANIFEST.in

Lines changed: 0 additions & 1 deletion
This file was deleted.

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ less memory (~30 KB), and it was kinda fun to write besides :)
2727

2828
The script `makeunicode_age.py` consumes
2929
[`DerivedAge.txt`](https://www.unicode.org/reports/tr44/#DerivedAge.txt) and
30-
produces the header file that holds the backing data for this module and fills
31-
in the number of spans in the Cython template. To make a build for another
32-
version of the Unicode Character Database, you should be able to replace
33-
`DerivedAge.txt` and re-run this script.
30+
produces the `unicode_age_db.py` file that holds the backing data for this
31+
library. To make a build for another version of the Unicode Character Database,
32+
you should be able to replace `DerivedAge.txt` and re-run this script.

makeunicode_age.py

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,49 @@
1-
from pathlib import Path
2-
import pprint
1+
from __future__ import annotations
32
import re
4-
import string
3+
import struct
54
import sys
5+
from pathlib import Path
66
from textwrap import dedent
77

88

99
HERE = Path(__file__).parent.resolve()
1010
DERIVEDAGES = HERE.joinpath("DerivedAge.txt")
1111

12-
CYTHON_INFILE = HERE.joinpath("unicode_age.pyx.in")
13-
CYTHON_TEMPLATE = string.Template(CYTHON_INFILE.read_text())
1412

13+
def _write_spans(spans: list, ucd_version: tuple, outfile: Path):
14+
span_fmt = "iibb"
15+
VersionSpan = struct.Struct(span_fmt)
1516

16-
def _write_spans(spans: list, c_out: Path, cython_out: Path):
17-
c_src = dedent("""
18-
// 8 + 8 + 2 = 18 bytes per span
19-
// 1283 'real' spans, 435 singleton spans as of Unicode 15.0
20-
// ~31 KB of storage required (in practice the actual consumed memory is ~21 KB? not sure why that is...)
21-
typedef struct {
22-
int start;
23-
int stop;
24-
char major;
25-
char minor;
26-
} versionSpan_t;
17+
Nbytes = len(spans) * VersionSpan.size
18+
buf = bytearray(Nbytes)
2719

20+
for n, s in enumerate(spans):
21+
VersionSpan.pack_into(buf, n*VersionSpan.size, *s)
2822

29-
static const versionSpan_t versionSpans[] = {
30-
""")
31-
32-
c_src += "\t"
23+
py_src = dedent(f"""
24+
from __future__ import annotations
25+
import struct
3326
34-
for (start, stop, major, minor) in spans:
35-
line = f"{{0x{start:06x}, 0x{stop:06x}, {major}, {minor}}}"
36-
c_src += f"{line},\n\t"
27+
UCD_VERSION = {ucd_version}
3728
38-
c_src += "\n};"
29+
VersionSpan = struct.Struct({span_fmt!r})
3930
40-
pyx_src = CYTHON_TEMPLATE.substitute({"numSpans": len(spans)})
31+
def iter_spans():
32+
yield from VersionSpan.iter_unpack(VERSION_SPANS)
4133
34+
VERSION_SPANS = {repr(buf)}
35+
""")
4236

43-
c_out.write_text(c_src)
44-
print(f"Wrote to {c_out}")
4537

46-
cython_out.write_text(pyx_src)
47-
print(f"Wrote to {cython_out}")
38+
outfile.write_text(py_src)
39+
print(f"Wrote to {outfile}")
4840

4941

50-
def _derivedage_spans():
42+
def _derivedage_spans(fn):
5143
CODEPT = r"[0-9A-Fa-f]+"
5244
PATT = rf"^({CODEPT})(?:\.\.({CODEPT}))?\s*;\s*([\d.]+)\s*#.*"
5345

54-
with open(DERIVEDAGES, "r") as f:
46+
with open(fn, "r") as f:
5547
for line in f:
5648
if line.strip() and line.startswith("#"):
5749
continue
@@ -69,14 +61,31 @@ def _derivedage_spans():
6961
yield start, stop, major, minor
7062

7163

64+
def parse_ucdversion(fn: Path) -> tuple[int, int, int]:
65+
with open(fn, "r") as f:
66+
patt = r"DerivedAge-(?P<version>\d+\.\d+\.\d+)\.txt"
67+
m = re.search(patt, f.readline())
68+
if not m:
69+
raise ValueError("Cannot determine UCD version of {str(fn)!r}")
7270

73-
def main():
74-
spans = list(_derivedage_spans())
71+
ver = tuple(int(val) for val in m.group("version").split('.'))
72+
return ver
7573

76-
C_OUTFILE = HERE.joinpath("src", "unicode_age.h")
77-
CYTHON_OUTFILE = HERE.joinpath("src", "unicode_age.pyx")
7874

79-
_write_spans(spans, c_out=C_OUTFILE, cython_out=CYTHON_OUTFILE)
75+
def main():
76+
ucd_version = parse_ucdversion(DERIVEDAGES)
77+
print(f"Scanning for version spans for UCD {ucd_version}: {str(DERIVEDAGES)}")
78+
spans = list(_derivedage_spans(DERIVEDAGES))
79+
print(f"Found {len(spans)} versioned spans")
80+
81+
UNICODE_AGE = HERE.joinpath("src", "unicode_age")
82+
PYTHON_OUTFILE = UNICODE_AGE.joinpath("unicode_age_db.py")
83+
84+
_write_spans(
85+
spans,
86+
ucd_version=ucd_version,
87+
outfile=PYTHON_OUTFILE,
88+
)
8089

8190

8291
if __name__ == "__main__":

pyproject.toml

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,12 @@ readme = "README.md"
99
[project.optional-dependencies]
1010
dev = [
1111
"pytest",
12-
"scriv[toml]",
1312
"twine",
14-
"cibuildwheel",
15-
"Cython",
1613
"build",
1714
]
1815

1916
[build-system]
20-
requires = ["setuptools", "wheel", "Cython"]
17+
requires = ["setuptools"]
2118
build-backend = "setuptools.build_meta"
2219

2320
[tool.tox]
@@ -29,11 +26,3 @@ envlist = py{38,39,310}, pypy{38,39}
2926
deps = pytest
3027
commands = pytest
3128
"""
32-
33-
[tool.cibuildwheel]
34-
test-requires = "pytest"
35-
test-command = "pytest -v {project}/tests"
36-
37-
[tool.scriv]
38-
version = "literal: pyproject.toml: project.version"
39-
fragment_directory = "news"

setup.py

Lines changed: 0 additions & 7 deletions
This file was deleted.

0 commit comments

Comments
 (0)