diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2a7e62b0d0..9b020a59a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,12 +35,25 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - name: Set up Rust + uses: dtolnay/rust-toolchain@stable - name: Install dependencies run: | pip install --upgrade pip wheel pip install --editable . - - name: Test with bikeshed + - name: Build Rust extension + run: | + pip install maturin + maturin build --release + pip install --force-reinstall --find-links rust/target/wheels bikeshed_rust + - name: Test with bikeshed (Python mode) + run: bikeshed --no-update test + env: + BIKESHED_USE_RUST: '0' + - name: Test with bikeshed (Rust mode) run: bikeshed --no-update test + env: + BIKESHED_USE_RUST: '1' lint: diff --git a/.gitignore b/.gitignore index 538c8eaf90..b1270c7cde 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,5 @@ node_modules/ /playwright/.cache/ /env /docs/*.html -/html-perf-test \ No newline at end of file +/rust/target/ +/html-perf-test diff --git a/README.md b/README.md index 1207b48305..e0474fa0c0 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,13 @@ though most such specs have switched their source file extensions to `.bs` now. Using `.src.html` in most text editors will display the file with HTML source formatting, which isn't generally what you want.) +Rust +----------- + +Bikeshed includes optional Rust extensions, in an effort to port some or all of the code into Rust. + +To enable: `export BIKESHED_USE_RUST=1` + License ------- diff --git a/bikeshed/h/parser/parser.py b/bikeshed/h/parser/parser.py index 23376e0884..fc95ca53b8 100644 --- a/bikeshed/h/parser/parser.py +++ b/bikeshed/h/parser/parser.py @@ -6,7 +6,7 @@ from ... import config, constants, t from ... import messages as m -from . import preds +from . import preds_wrapper as preds from .nodes import ( Comment, Doctype, diff --git a/bikeshed/h/parser/preds_wrapper.py b/bikeshed/h/parser/preds_wrapper.py new file mode 100644 index 0000000000..6623d30efa --- /dev/null +++ b/bikeshed/h/parser/preds_wrapper.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +import os + +_USE_RUST = os.environ.get("BIKESHED_USE_RUST", "").lower() in ("1", "true") + +if _USE_RUST: + try: + import bikeshed_rust + from . import preds as _preds + + isASCII = bikeshed_rust.is_ascii + isASCIIAlpha = bikeshed_rust.is_ascii_alpha + isASCIIAlphanum = bikeshed_rust.is_ascii_alphanum + isASCIILowerAlpha = bikeshed_rust.is_ascii_lower_alpha + isASCIIUpperAlpha = bikeshed_rust.is_ascii_upper_alpha + isAttrNameChar = bikeshed_rust.is_attr_name_char + isControl = bikeshed_rust.is_control + isDigit = bikeshed_rust.is_digit + isHexDigit = bikeshed_rust.is_hex_digit + isNoncharacter = bikeshed_rust.is_noncharacter + isTagnameChar = bikeshed_rust.is_tagname_char + isWhitespace = bikeshed_rust.is_whitespace + + charRefs = _preds.charRefs + xmlishTagnames = _preds.xmlishTagnames + isXMLishTagname = _preds.isXMLishTagname + except ImportError: + from . import preds as _preds + + charRefs = _preds.charRefs + xmlishTagnames = _preds.xmlishTagnames + isASCII = _preds.isASCII + isASCIIAlpha = _preds.isASCIIAlpha + isASCIIAlphanum = _preds.isASCIIAlphanum + isASCIILowerAlpha = _preds.isASCIILowerAlpha + isASCIIUpperAlpha = _preds.isASCIIUpperAlpha + isAttrNameChar = _preds.isAttrNameChar + isControl = _preds.isControl + isDigit = _preds.isDigit + isHexDigit = _preds.isHexDigit + isNoncharacter = _preds.isNoncharacter + isTagnameChar = _preds.isTagnameChar + isWhitespace = _preds.isWhitespace + isXMLishTagname = _preds.isXMLishTagname + +else: + from . import preds as _preds + + charRefs = _preds.charRefs + xmlishTagnames = _preds.xmlishTagnames + isASCII = _preds.isASCII + isASCIIAlpha = _preds.isASCIIAlpha + isASCIIAlphanum = _preds.isASCIIAlphanum + isASCIILowerAlpha = _preds.isASCIILowerAlpha + isASCIIUpperAlpha = _preds.isASCIIUpperAlpha + isAttrNameChar = _preds.isAttrNameChar + isControl = _preds.isControl + isDigit = _preds.isDigit + isHexDigit = _preds.isHexDigit + isNoncharacter = _preds.isNoncharacter + isTagnameChar = _preds.isTagnameChar + isWhitespace = _preds.isWhitespace + isXMLishTagname = _preds.isXMLishTagname + +__all__ = [ + "charRefs", + "xmlishTagnames", + "isASCII", + "isASCIIAlpha", + "isASCIIAlphanum", + "isASCIILowerAlpha", + "isASCIIUpperAlpha", + "isAttrNameChar", + "isControl", + "isDigit", + "isHexDigit", + "isNoncharacter", + "isTagnameChar", + "isWhitespace", + "isXMLishTagname", +] diff --git a/bikeshed/test.py b/bikeshed/test.py index 9f3099fdca..66bbe39b5f 100644 --- a/bikeshed/test.py +++ b/bikeshed/test.py @@ -12,6 +12,19 @@ from . import messages as m from .Spec import Spec + +def _getTestTitle() -> str: + try: + from .h.parser import preds_wrapper + # Check if we're using the Rust implementation + if hasattr(preds_wrapper, 'isASCII'): + module = getattr(preds_wrapper.isASCII, '__module__', '') + if 'bikeshed_rust' in module: + return "Running tests [R]" + except Exception: + pass + return "Running tests" + if t.TYPE_CHECKING: import argparse @@ -101,7 +114,7 @@ def run( numPassed = 0 total = 0 fails = [] - pathProgress = alive_it(paths, dual_line=True, length=20) + pathProgress = alive_it(paths, dual_line=True, length=20, title=_getTestTitle()) try: for path in pathProgress: testName = testNameForPath(path) @@ -149,7 +162,7 @@ def rebase( if len(paths) == 0: m.p("No tests were found.") return True - pathProgress = alive_it(paths, dual_line=True, length=20) + pathProgress = alive_it(paths, dual_line=True, length=20, title=_getTestTitle()) try: for path in pathProgress: testName = testNameForPath(path) diff --git a/pyproject.toml b/pyproject.toml index d8e0d525a9..a5e7f80db8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,17 @@ +[build-system] +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" + +[project] +name = "bikeshed" +version = "0.1.0" +requires-python = ">=3.9" + +[tool.maturin] +manifest-path = "rust/Cargo.toml" +module-name = "bikeshed_rust" +python-source = "bikeshed" + [tool.black] line-length = 120 diff --git a/rust/Cargo.lock b/rust/Cargo.lock new file mode 100644 index 0000000000..831c6f2fe7 --- /dev/null +++ b/rust/Cargo.lock @@ -0,0 +1,163 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bikeshed_rust" +version = "0.1.0" +dependencies = [ + "pyo3", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "indoc" +version = "2.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" + +[[package]] +name = "libc" +version = "0.2.176" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + +[[package]] +name = "proc-macro2" +version = "1.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" +dependencies = [ + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" +dependencies = [ + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" + +[[package]] +name = "unicode-ident" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" + +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 0000000000..d811381053 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "bikeshed_rust" +version = "0.1.0" +edition = "2024" + +[lib] +name = "bikeshed_rust" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = { version = "0.26", features = ["extension-module"] } + +[profile.release] +lto = true +codegen-units = 1 +strip = true diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 0000000000..66ac4b9548 --- /dev/null +++ b/rust/README.md @@ -0,0 +1,28 @@ +# Bikeshed Rust Extensions + +This directory contains Rust implementations of Bikeshed modules, compiled to Python extensions using PyO3. + +## Building + +```bash +# Install maturin (build tool) +cargo install maturin + +# Build release wheel +cd rust +maturin build --release +``` + +## Testing + +## Using Rust Extensions + +```bash +# Use Rust implementation +export BIKESHED_USE_RUST=1 +bikeshed spec input.bs output.html + +# Use Python implementation (default) +export BIKESHED_USE_RUST=0 +bikeshed spec input.bs output.html +``` diff --git a/rust/src/lib.rs b/rust/src/lib.rs new file mode 100644 index 0000000000..0a6847259a --- /dev/null +++ b/rust/src/lib.rs @@ -0,0 +1,20 @@ +use pyo3::prelude::*; + +mod preds; + +#[pymodule] +fn bikeshed_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(preds::is_whitespace, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_digit, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_hex_digit, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_ascii_lower_alpha, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_ascii_upper_alpha, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_ascii_alpha, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_ascii_alphanum, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_ascii, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_control, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_noncharacter, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_attr_name_char, m)?)?; + m.add_function(wrap_pyfunction!(preds::is_tagname_char, m)?)?; + Ok(()) +} diff --git a/rust/src/preds.rs b/rust/src/preds.rs new file mode 100644 index 0000000000..606859faf3 --- /dev/null +++ b/rust/src/preds.rs @@ -0,0 +1,131 @@ +use pyo3::prelude::*; +use pyo3::types::PyString; + +/// Helper to extract a character from either str or int +fn get_codepoint(obj: &Bound<'_, PyAny>) -> PyResult> { + if let Ok(s) = obj.downcast::() { + let s = s.to_str()?; + Ok(get_char(s)) + } else if let Ok(i) = obj.extract::() { + if i < 0 || i > 0x10FFFF { + return Ok(None); + } + Ok(char::from_u32(i as u32)) + } else { + Ok(None) + } +} + +/// Convers a 1 character str to the char, returns None if the string is > 1 +fn get_char(s: &str) -> Option { + if s.chars().count() != 1 { + None + } else { + s.chars().next() + } +} + +fn is_whitespace_char(ch: char) -> bool { + matches!(ch as u32, 0x9 | 0xA | 0xC | 0x20) +} + +/// Check if a character is whitespace (tab, newline, form feed, or space) +#[pyfunction] +pub fn is_whitespace(ch: &Bound<'_, PyAny>) -> PyResult> { + Ok(get_codepoint(ch)?.map(is_whitespace_char)) +} + +/// Check if a character is a digit (0-9) +#[pyfunction] +pub fn is_digit(ch: &str) -> Option { + get_char(ch).map(|c| c.is_ascii_digit()) +} + +/// Check if a character is a hexadecimal digit +#[pyfunction] +pub fn is_hex_digit(ch: &str) -> Option { + get_char(ch).map(|c| c.is_ascii_hexdigit()) +} + +/// Check if a character is ASCII lowercase alpha +#[pyfunction] +pub fn is_ascii_lower_alpha(ch: &str) -> Option { + get_char(ch).map(|c| c.is_ascii_lowercase()) +} + +/// Check if a character is ASCII uppercase alpha +#[pyfunction] +pub fn is_ascii_upper_alpha(ch: &str) -> Option { + get_char(ch).map(|c| c.is_ascii_uppercase()) +} + +/// Check if a character is ASCII alpha (upper or lower) +#[pyfunction] +pub fn is_ascii_alpha(ch: &str) -> Option { + get_char(ch).map(|c| c.is_ascii_alphabetic()) +} + +/// Check if a character is ASCII alphanumeric +#[pyfunction] +pub fn is_ascii_alphanum(ch: &str) -> Option { + get_char(ch).map(|c| c.is_ascii_alphanumeric()) +} + +/// Check if a character is ASCII (code point <= 127) +#[pyfunction] +pub fn is_ascii(ch: &str) -> Option { + get_char(ch).map(|c| c.is_ascii()) +} + +/// Check if a character is a control character +#[pyfunction] +pub fn is_control(ch: &Bound<'_, PyAny>) -> PyResult> { + Ok(get_codepoint(ch)?.map(|c| { + let cp = c as u32; + (cp <= 0x08) || (cp == 0x0B) || (0x0D..=0x1F).contains(&cp) || (0x7F..=0x9F).contains(&cp) + })) +} + +/// Check if a character is a noncharacter +#[pyfunction] +pub fn is_noncharacter(ch: &Bound<'_, PyAny>) -> PyResult> { + Ok(get_codepoint(ch)?.map(|c| { + let cp = c as u32; + (0xFDD0..=0xFDEF).contains(&cp) || (cp & 0xFFFE == 0xFFFE && cp <= 0x10FFFF) + })) +} + +/// Check if a character is valid for an attribute name +#[pyfunction] +pub fn is_attr_name_char(ch: &str) -> Option { + get_char(ch).map(|c| { + if is_whitespace_char(c) { + return false; + } + !matches!(c, '/' | '<' | '>' | '=' | '"' | '\'' | '\0') + }) +} + +/// Check if a character is valid for a tag name +#[pyfunction] +pub fn is_tagname_char(ch: &str) -> Option { + get_char(ch).map(|c| { + if matches!(c, '-' | '.' | '_') || c.is_ascii_alphanumeric() { + return true; + } + + let cp = c as u32; + match cp { + 0xB7 => true, + 0xC0..=0x1FFF => !matches!(cp, 0xD7 | 0xF7 | 0x37E), + 0x200C | 0x200D | 0x203F | 0x2040 => true, + 0x2070..=0x218F => true, + 0x2C00..=0x2FEF => true, + 0x3001..=0xD7FF => true, + 0xF900..=0xFDCF => true, + 0xFDF0..=0xFFFD => true, + 0x10000..=0xEFFFF => true, + _ => false, + } + }) +}