Skip to content

Commit b375f3f

Browse files
lvcatae898
authored andcommitted
fix: arcadedb-embedded PyPI wheel will not install on manylinux_2_34_x86_64 (RHEL 9, Rocky 9, AlmaLinux 9, Amazon Linux 2023)
Fixed issue ArcadeData#4037
1 parent af75193 commit b375f3f

3 files changed

Lines changed: 264 additions & 3 deletions

File tree

bindings/python/scripts/Dockerfile.build

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ COPY bindings/python/setup.py .
169169
COPY bindings/python/scripts/setup_jars.py ./scripts/
170170
COPY bindings/python/scripts/extract_version.py ./scripts/
171171
COPY bindings/python/scripts/write_version.py ./scripts/
172+
COPY bindings/python/scripts/verify_wheel_platform_tag.py ./scripts/
172173
COPY bindings/python/scripts/jar_exclusions.txt .
173174
COPY bindings/python/pyproject.toml ./
174175
COPY bindings/python/README.md ./
@@ -195,7 +196,17 @@ RUN echo "📌 Package name: ${PACKAGE_NAME}" && \
195196
echo "📦 JAR files and JRE copied"
196197

197198
# Build the wheel
198-
# Extract PEP 440 version from pom.xml or use BUILD_VERSION if provided
199+
# Extract PEP 440 version from pom.xml or use BUILD_VERSION if provided.
200+
#
201+
# Wheel platform tag must match the highest glibc symbol version required by the
202+
# bundled JRE. amazoncorretto:25 (Amazon Linux 2023, glibc 2.34) ships a JRE
203+
# whose binaries link against GLIBC_2.34 at most; older glibc symbol versions
204+
# cannot be back-ported. Issue #4037: tagging as manylinux_2_35 prevents
205+
# installation on systems with glibc 2.34 (RHEL 9, Rocky 9, AlmaLinux 9, Amazon
206+
# Linux 2023, ...) even though the bundled JRE runs there fine. Keep the tag
207+
# aligned with the JRE's actual requirement; bump only when the JRE base image
208+
# moves to a newer glibc. The verify_wheel_platform_tag.py step below enforces
209+
# this invariant at build time.
199210
ARG BUILD_VERSION=""
200211
RUN if [ -n "${BUILD_VERSION}" ]; then \
201212
export ARCADEDB_VERSION="${BUILD_VERSION}"; \
@@ -212,9 +223,9 @@ RUN if [ -n "${BUILD_VERSION}" ]; then \
212223
sed -i 's|^description = .*|description = "'"${PACKAGE_DESCRIPTION}"'"|' pyproject.toml && \
213224
if echo "${TARGET_PLATFORM}" | grep -q '^linux-'; then \
214225
if [ "${TARGET_PLATFORM}" = "linux-x64" ]; then \
215-
WHEEL_PLAT="manylinux_2_35_x86_64"; \
226+
WHEEL_PLAT="manylinux_2_34_x86_64"; \
216227
elif [ "${TARGET_PLATFORM}" = "linux-arm64" ]; then \
217-
WHEEL_PLAT="manylinux_2_35_aarch64"; \
228+
WHEEL_PLAT="manylinux_2_34_aarch64"; \
218229
else \
219230
WHEEL_PLAT=""; \
220231
fi; \
@@ -233,6 +244,10 @@ RUN if [ -n "${BUILD_VERSION}" ]; then \
233244
ls -lh /build/dist; \
234245
exit 1; \
235246
fi; \
247+
WHEEL_FILE=$(ls /build/dist/*.whl | head -n1); \
248+
if [ -n "${WHEEL_FILE}" ]; then \
249+
python3 scripts/verify_wheel_platform_tag.py "${WHEEL_FILE}" /build/jre || exit 1; \
250+
fi; \
236251
fi && \
237252
echo "✅ Wheel built successfully!" && \
238253
ls -lh dist/
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#!/usr/bin/env python3
2+
"""Verify the Linux wheel's manylinux platform tag matches the bundled JRE.
3+
4+
Regression guard for issue #4037: the wheel was tagged ``manylinux_2_35`` while
5+
the bundled JRE only required ``GLIBC_2.34``, blocking installation on systems
6+
running glibc 2.34 (RHEL 9, Rocky 9, AlmaLinux 9, Amazon Linux 2023, ...). The
7+
wheel platform tag must equal the highest ``GLIBC_x.y`` symbol version actually
8+
referenced by the JRE binaries (``java``, ``libjvm.so``, ...). Tagging higher
9+
locks out users unnecessarily; tagging lower lets the wheel install on systems
10+
where the JRE cannot run.
11+
12+
Usage:
13+
python verify_wheel_platform_tag.py <wheel_path> <jre_dir>
14+
Verify a wheel's filename platform tag against the JRE in jre_dir.
15+
python verify_wheel_platform_tag.py --jre <jre_dir>
16+
Print the recommended manylinux tag for the JRE (no wheel check).
17+
18+
Exits with status 0 on success, non-zero on mismatch.
19+
20+
@author Luca Garulli (l.garulli@arcadedata.com)
21+
"""
22+
23+
from __future__ import annotations
24+
25+
import argparse
26+
import os
27+
import re
28+
import sys
29+
from pathlib import Path
30+
31+
GLIBC_SYM_RE = re.compile(rb"GLIBC_(\d+)\.(\d+)(?:\.(\d+))?")
32+
WHEEL_TAG_RE = re.compile(r"-(manylinux_(\d+)_(\d+)_(x86_64|aarch64))\.whl$")
33+
34+
35+
def _max_glibc_in_file(path: Path) -> tuple[int, int] | None:
36+
"""Return the highest (major, minor) GLIBC_ symbol referenced by an ELF file."""
37+
try:
38+
data = path.read_bytes()
39+
except OSError:
40+
return None
41+
if not data.startswith(b"\x7fELF"):
42+
return None
43+
best: tuple[int, int] | None = None
44+
for match in GLIBC_SYM_RE.finditer(data):
45+
major = int(match.group(1))
46+
minor = int(match.group(2))
47+
if best is None or (major, minor) > best:
48+
best = (major, minor)
49+
return best
50+
51+
52+
def max_glibc_in_dir(jre_dir: Path) -> tuple[int, int]:
53+
"""Walk a JRE/JDK tree and return its highest GLIBC_ symbol version."""
54+
best: tuple[int, int] = (0, 0)
55+
found_any = False
56+
for root, _dirs, files in os.walk(jre_dir):
57+
for name in files:
58+
p = Path(root) / name
59+
if p.is_symlink():
60+
continue
61+
ver = _max_glibc_in_file(p)
62+
if ver is None:
63+
continue
64+
found_any = True
65+
if ver > best:
66+
best = ver
67+
if not found_any:
68+
raise SystemExit(f"No ELF binaries with GLIBC_ symbols found under {jre_dir}")
69+
return best
70+
71+
72+
def parse_wheel_tag(wheel_path: Path) -> tuple[tuple[int, int], str]:
73+
"""Extract (glibc_version, arch) from a manylinux wheel filename."""
74+
match = WHEEL_TAG_RE.search(wheel_path.name)
75+
if not match:
76+
raise SystemExit(
77+
f"Wheel '{wheel_path.name}' does not have a recognized manylinux_X_Y_arch tag"
78+
)
79+
return (int(match.group(2)), int(match.group(3))), match.group(4)
80+
81+
82+
def main() -> int:
83+
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
84+
parser.add_argument("wheel", nargs="?", type=Path, help="Path to the .whl file")
85+
parser.add_argument("jre", nargs="?", type=Path, help="Path to the bundled JRE/JDK directory")
86+
parser.add_argument("--jre", dest="jre_only", type=Path, help="Inspect a JRE only and print its required tag")
87+
args = parser.parse_args()
88+
89+
if args.jre_only is not None:
90+
major, minor = max_glibc_in_dir(args.jre_only)
91+
print(f"manylinux_{major}_{minor}")
92+
return 0
93+
94+
if args.wheel is None or args.jre is None:
95+
parser.print_usage(sys.stderr)
96+
return 2
97+
98+
wheel_glibc, _arch = parse_wheel_tag(args.wheel)
99+
jre_glibc = max_glibc_in_dir(args.jre)
100+
101+
print(f"Wheel platform tag glibc: {wheel_glibc[0]}.{wheel_glibc[1]}")
102+
print(f"JRE max GLIBC_ symbol: {jre_glibc[0]}.{jre_glibc[1]}")
103+
104+
if wheel_glibc != jre_glibc:
105+
print(
106+
f"ERROR: wheel manylinux tag (manylinux_{wheel_glibc[0]}_{wheel_glibc[1]}) "
107+
f"does not match the JRE's actual glibc requirement "
108+
f"(manylinux_{jre_glibc[0]}_{jre_glibc[1]}). "
109+
"Update WHEEL_PLAT in scripts/Dockerfile.build (issue #4037).",
110+
file=sys.stderr,
111+
)
112+
return 1
113+
114+
print("OK: wheel platform tag matches the JRE's glibc requirement.")
115+
return 0
116+
117+
118+
if __name__ == "__main__":
119+
sys.exit(main())
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
"""Regression tests for issue #4037: wheel manylinux platform tag.
2+
3+
The Linux wheel must be tagged with the same glibc version that the bundled JRE
4+
binaries actually require. Tagging higher than the JRE needs (e.g. manylinux_2_35
5+
when the JRE only references GLIBC_2.34 symbols) blocks installation on systems
6+
running glibc 2.34 (RHEL 9, Rocky 9, AlmaLinux 9, Amazon Linux 2023, ...) for no
7+
reason. Tagging lower lets the wheel install where the JRE cannot run.
8+
9+
These tests exercise the verifier script directly with synthetic ELF files so we
10+
do not depend on Docker or a built wheel.
11+
12+
@author Luca Garulli (l.garulli@arcadedata.com)
13+
"""
14+
15+
from __future__ import annotations
16+
17+
import importlib.util
18+
from pathlib import Path
19+
20+
import pytest
21+
22+
23+
SCRIPT_PATH = Path(__file__).resolve().parents[1] / "scripts" / "verify_wheel_platform_tag.py"
24+
25+
26+
def _load_verifier():
27+
spec = importlib.util.spec_from_file_location("verify_wheel_platform_tag", SCRIPT_PATH)
28+
module = importlib.util.module_from_spec(spec)
29+
assert spec.loader is not None
30+
spec.loader.exec_module(module)
31+
return module
32+
33+
34+
def _make_elf_with_glibc_symbols(path: Path, versions: list[str]) -> None:
35+
"""Create a tiny pseudo-ELF file containing GLIBC_X.Y strings.
36+
37+
The verifier only scans for the literal pattern ``GLIBC_<major>.<minor>``;
38+
it does not parse ELF dynamic sections. A minimal ELF magic prefix plus the
39+
embedded version strings is enough to mimic a real binary for the test.
40+
"""
41+
payload = b"\x7fELF" + b"\x00" * 12
42+
payload += b"\x00".join(f"GLIBC_{v}".encode("ascii") for v in versions)
43+
payload += b"\x00"
44+
path.write_bytes(payload)
45+
46+
47+
def test_max_glibc_in_dir_picks_highest(tmp_path: Path) -> None:
48+
verifier = _load_verifier()
49+
_make_elf_with_glibc_symbols(tmp_path / "java", ["2.17", "2.34"])
50+
_make_elf_with_glibc_symbols(tmp_path / "libjvm.so", ["2.17", "2.28", "2.34"])
51+
52+
assert verifier.max_glibc_in_dir(tmp_path) == (2, 34)
53+
54+
55+
def test_parse_wheel_tag_extracts_version_and_arch(tmp_path: Path) -> None:
56+
verifier = _load_verifier()
57+
wheel = tmp_path / "arcadedb_embedded-26.4.2-py3-none-manylinux_2_34_x86_64.whl"
58+
wheel.write_bytes(b"")
59+
60+
glibc, arch = verifier.parse_wheel_tag(wheel)
61+
assert glibc == (2, 34)
62+
assert arch == "x86_64"
63+
64+
65+
def test_main_succeeds_when_tag_matches(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None:
66+
verifier = _load_verifier()
67+
jre_dir = tmp_path / "jre"
68+
jre_dir.mkdir()
69+
_make_elf_with_glibc_symbols(jre_dir / "libjvm.so", ["2.17", "2.34"])
70+
71+
wheel = tmp_path / "arcadedb_embedded-26.4.2-py3-none-manylinux_2_34_x86_64.whl"
72+
wheel.write_bytes(b"")
73+
74+
rc = _run_main(verifier, [str(wheel), str(jre_dir)])
75+
assert rc == 0
76+
assert "OK" in capsys.readouterr().out
77+
78+
79+
def test_main_fails_when_tag_higher_than_jre(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None:
80+
"""Reproduces issue #4037: wheel tagged manylinux_2_35 but JRE only needs GLIBC_2.34."""
81+
verifier = _load_verifier()
82+
jre_dir = tmp_path / "jre"
83+
jre_dir.mkdir()
84+
_make_elf_with_glibc_symbols(jre_dir / "libjvm.so", ["2.17", "2.34"])
85+
86+
wheel = tmp_path / "arcadedb_embedded-26.4.2-py3-none-manylinux_2_35_x86_64.whl"
87+
wheel.write_bytes(b"")
88+
89+
rc = _run_main(verifier, [str(wheel), str(jre_dir)])
90+
captured = capsys.readouterr()
91+
assert rc != 0
92+
assert "does not match" in captured.err
93+
assert "manylinux_2_35" in captured.err or "manylinux_2_34" in captured.err
94+
95+
96+
def test_main_fails_when_tag_lower_than_jre(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None:
97+
"""A too-low tag would let the wheel install on systems where the JRE cannot run."""
98+
verifier = _load_verifier()
99+
jre_dir = tmp_path / "jre"
100+
jre_dir.mkdir()
101+
_make_elf_with_glibc_symbols(jre_dir / "libjvm.so", ["2.17", "2.36"])
102+
103+
wheel = tmp_path / "arcadedb_embedded-26.4.2-py3-none-manylinux_2_28_x86_64.whl"
104+
wheel.write_bytes(b"")
105+
106+
rc = _run_main(verifier, [str(wheel), str(jre_dir)])
107+
assert rc != 0
108+
assert "does not match" in capsys.readouterr().err
109+
110+
111+
def _run_main(verifier, argv: list[str]) -> int:
112+
import sys
113+
114+
saved = sys.argv
115+
sys.argv = ["verify_wheel_platform_tag.py", *argv]
116+
try:
117+
return verifier.main()
118+
finally:
119+
sys.argv = saved
120+
121+
122+
def test_module_exposes_public_api() -> None:
123+
"""Sanity check: the verifier module imports without errors and exposes its API."""
124+
verifier = _load_verifier()
125+
assert hasattr(verifier, "max_glibc_in_dir")
126+
assert hasattr(verifier, "parse_wheel_tag")
127+
assert hasattr(verifier, "main")

0 commit comments

Comments
 (0)