diff --git a/build_tools/getdeps_fallback_mirror.py b/build_tools/getdeps_fallback_mirror.py index a6d10007ef09..b32a39ee934d 100644 --- a/build_tools/getdeps_fallback_mirror.py +++ b/build_tools/getdeps_fallback_mirror.py @@ -6,60 +6,183 @@ Pre-download packages with unreliable mirrors using fallback mirrors. Reads package info from folly's getdeps manifest files. """ -import sys -import os -import hashlib -import subprocess import configparser +import hashlib +import os +import shutil +import sys +import urllib.request + +DOWNLOAD_TIMEOUT_SECONDS = 120 +DOWNLOAD_CHUNK_BYTES = 64 * 1024 +MAX_DOWNLOAD_BYTES = 50 * 1024 * 1024 + +MIRROR_FALLBACKS = { + "ftpmirror.gnu.org/gnu/": [ + "https://mirrors.kernel.org/gnu/", + "https://ftpmirror.gnu.org/gnu/", + "https://ftp.gnu.org/gnu/", + ], + "ftp.gnu.org/gnu/": [ + "https://mirrors.kernel.org/gnu/", + "https://ftpmirror.gnu.org/gnu/", + "https://ftp.gnu.org/gnu/", + ], +} + +# These packages must have URLs matching MIRROR_FALLBACKS; other packages are +# left for getdeps.py's normal download path. +PACKAGES_TO_CHECK = ("autoconf", "automake", "libtool", "libiberty") + def sha256_file(path): """Calculate SHA256 hash of a file.""" h = hashlib.sha256() try: - with open(path, 'rb') as f: - for chunk in iter(lambda: f.read(65536), b''): + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(65536), b""): h.update(chunk) return h.hexdigest() except Exception: return None + def parse_manifest(manifest_path): """Parse a getdeps manifest file to extract download info.""" - config = configparser.ConfigParser() + # folly manifests can contain bare keys in sections unrelated to downloads. + config = configparser.ConfigParser(allow_no_value=True, interpolation=None) try: - config.read(manifest_path) - if 'download' in config: - return { - 'url': config['download'].get('url', ''), - 'sha256': config['download'].get('sha256', ''), - } - except Exception: - pass + with open(manifest_path, encoding="utf-8") as manifest_file: + config.read_file(manifest_file) + except Exception as ex: + print(f" {os.path.basename(manifest_path)}: WARNING - parse failed: {ex}") + return None + + if "download" in config: + return { + "url": config["download"].get("url", ""), + "sha256": config["download"].get("sha256", ""), + } return None + +def file_size(path): + try: + return os.path.getsize(path) + except Exception: + return None + + def get_fallback_mirrors(url): """Get fallback mirror URLs for a given URL.""" - # Fallback mirror patterns for known unreliable hosts - mirror_fallbacks = { - "ftp.gnu.org/gnu/": [ - "https://mirrors.kernel.org/gnu/", - "https://ftpmirror.gnu.org/gnu/", - "https://ftp.gnu.org/gnu/", - ], - "ftpmirror.gnu.org/gnu/": [ - "https://mirrors.kernel.org/gnu/", - "https://ftpmirror.gnu.org/gnu/", - "https://ftp.gnu.org/gnu/", - ], - } - - for pattern, mirrors in mirror_fallbacks.items(): + for pattern, mirrors in MIRROR_FALLBACKS.items(): if pattern in url: # Extract the path after the pattern path_start = url.find(pattern) + len(pattern) path = url[path_start:] return [mirror + path for mirror in mirrors] - return [url] # No fallback, use original + return [] + + +def download_url(url, filepath): + """Download URL to filepath without leaving partial files behind.""" + tmp_filepath = filepath + ".tmp" + if os.path.exists(tmp_filepath): + os.remove(tmp_filepath) + + request = urllib.request.Request( + url, headers={"User-Agent": "rocksdb-getdeps-fallback/1.0"} + ) + try: + with urllib.request.urlopen( + request, timeout=DOWNLOAD_TIMEOUT_SECONDS + ) as response, open(tmp_filepath, "wb") as output: + copied = 0 + while True: + chunk = response.read(DOWNLOAD_CHUNK_BYTES) + if not chunk: + break + + copied += len(chunk) + if copied > MAX_DOWNLOAD_BYTES: + raise Exception( + f"download exceeds {MAX_DOWNLOAD_BYTES} bytes" + ) + output.write(chunk) + os.replace(tmp_filepath, filepath) + finally: + if os.path.exists(tmp_filepath): + os.remove(tmp_filepath) + + +def prepare_download(package, info, download_dir, cache_dir): + url = info["url"] + expected_sha256 = info["sha256"] + mirrors = get_fallback_mirrors(url) + if not mirrors: + return False + + if not expected_sha256: + print(f" {package}: WARNING - skipped fallback without sha256") + return False + + # getdeps uses format: {package}-{filename} + filename = f"{package}-{os.path.basename(url)}" + filepath = os.path.join(download_dir, filename) + cache_path = os.path.join(cache_dir, filename) + + # Check if already valid. + actual_sha256 = sha256_file(filepath) if os.path.exists(filepath) else None + if actual_sha256 == expected_sha256: + print(f" {filename}: OK (already downloaded)") + return True + if actual_sha256 is not None: + print( + f" {filename}: WARNING - removing invalid download " + f"sha256={actual_sha256}" + ) + os.remove(filepath) + + # The cache is only an opportunistic single-build accelerator; callers + # should not share it across concurrent builds without external locking. + actual_sha256 = sha256_file(cache_path) if os.path.exists(cache_path) else None + if actual_sha256 == expected_sha256: + print(f" {filename}: OK (from cache)") + shutil.copy2(cache_path, filepath) + return True + if actual_sha256 is not None: + print( + f" {filename}: WARNING - removing invalid cache " + f"sha256={actual_sha256}" + ) + os.remove(cache_path) + + # Try fallback mirrors. + for mirror_url in mirrors: + print(f" {filename}: trying {mirror_url}...") + try: + download_url(mirror_url, filepath) + except Exception as ex: + print(f" {filename}: WARNING - download failed: {ex}") + continue + + actual_sha256 = sha256_file(filepath) + if actual_sha256 == expected_sha256: + size = file_size(filepath) + print(f" {filename}: OK (downloaded, {size} bytes)") + shutil.copy2(filepath, cache_path) + return True + + size = file_size(filepath) + print( + f" {filename}: WARNING - sha256 mismatch from {mirror_url}: " + f"expected={expected_sha256} actual={actual_sha256} size={size}" + ) + os.remove(filepath) + + print(f" {filename}: WARNING - all mirrors failed") + return False + def main(): if len(sys.argv) != 4: @@ -67,60 +190,39 @@ def main(): sys.exit(1) download_dir, cache_dir, manifests_dir = sys.argv[1], sys.argv[2], sys.argv[3] + os.makedirs(download_dir, exist_ok=True) + os.makedirs(cache_dir, exist_ok=True) - # Packages known to have unreliable mirrors - packages_to_check = ["autoconf", "automake", "libtool"] - - for package in packages_to_check: + checked = 0 + ready = 0 + for package in PACKAGES_TO_CHECK: manifest_path = os.path.join(manifests_dir, package) - if not os.path.exists(manifest_path): + if not os.path.isfile(manifest_path): continue info = parse_manifest(manifest_path) - if not info or not info['url'] or not info['sha256']: + if not info or not info["url"]: continue - # Determine filename from URL - url = info['url'] - expected_sha256 = info['sha256'] - url_filename = os.path.basename(url) - - # getdeps uses format: {package}-{filename} - filename = f"{package}-{url_filename}" - filepath = os.path.join(download_dir, filename) - cache_path = os.path.join(cache_dir, filename) - - # Check if already valid - if os.path.exists(filepath) and sha256_file(filepath) == expected_sha256: - print(f" {filename}: OK (already downloaded)") + if not info["sha256"]: + print(f" {package}: WARNING - skipped fallback without sha256") continue - # Check cache - if os.path.exists(cache_path) and sha256_file(cache_path) == expected_sha256: - print(f" {filename}: OK (from cache)") - subprocess.run(['cp', cache_path, filepath], check=True) + if not get_fallback_mirrors(info["url"]): + print( + f" {package}: WARNING - skipped fallback without known mirror " + f"for {info['url']}" + ) continue - # Try fallback mirrors - mirrors = get_fallback_mirrors(url) - downloaded = False - for mirror_url in mirrors: - print(f" {filename}: trying {mirror_url}...") - try: - subprocess.run(['wget', '-q', '-O', filepath, mirror_url], check=True, timeout=120) - if sha256_file(filepath) == expected_sha256: - print(f" {filename}: OK (downloaded)") - subprocess.run(['cp', filepath, cache_path], check=False) - downloaded = True - break - else: - os.remove(filepath) - except Exception: - if os.path.exists(filepath): - os.remove(filepath) - - if not downloaded: - print(f" {filename}: WARNING - all mirrors failed") + checked += 1 + try: + if prepare_download(package, info, download_dir, cache_dir): + ready += 1 + except Exception as ex: + print(f" {package}: WARNING - fallback preparation failed: {ex}") + + print(f" fallback mirror downloads ready: {ready}/{checked}") if __name__ == "__main__": main() diff --git a/folly.mk b/folly.mk index 4d64e813fb1d..01d0f79f83ba 100644 --- a/folly.mk +++ b/folly.mk @@ -99,6 +99,30 @@ endif # FMT_SOURCE_PATH endif FOLLY_COMMIT_HASH = 548b16da0b3cc887d69cdb6ae06496ad8a2a9276 +FOLLY_GETDEPS_CACHE_DIR = /tmp/rocksdb-getdeps-cache + +define restore_folly_getdeps_downloads + @cd third-party/folly && \ + DOWNLOAD_DIR=`$(PYTHON) build/fbcode_builder/getdeps.py show-inst-dir | sed 's|/installed/.*|/downloads|'` && \ + mkdir -p "$$DOWNLOAD_DIR" && \ + CACHE_DIR="$(FOLLY_GETDEPS_CACHE_DIR)" && \ + mkdir -p "$$CACHE_DIR" && \ + echo "Restoring cached downloads..." && \ + if ls "$$CACHE_DIR"/*.tar.gz "$$CACHE_DIR"/*.tar.xz "$$CACHE_DIR"/*.zip >/dev/null 2>&1; then \ + cp -n "$$CACHE_DIR"/*.tar.gz "$$CACHE_DIR"/*.tar.xz "$$CACHE_DIR"/*.zip "$$DOWNLOAD_DIR/" 2>/dev/null || true; \ + fi && \ + echo "Handling known unreliable downloads with fallback mirrors..." && \ + $(PYTHON) ../../build_tools/getdeps_fallback_mirror.py "$$DOWNLOAD_DIR" "$$CACHE_DIR" build/fbcode_builder/manifests +endef + +define cache_folly_getdeps_downloads + @cd third-party/folly && \ + DOWNLOAD_DIR=`$(PYTHON) build/fbcode_builder/getdeps.py show-inst-dir | sed 's|/installed/.*|/downloads|'` && \ + CACHE_DIR="$(FOLLY_GETDEPS_CACHE_DIR)" && \ + if ls "$$DOWNLOAD_DIR"/*.tar.gz "$$DOWNLOAD_DIR"/*.tar.xz "$$DOWNLOAD_DIR"/*.zip >/dev/null 2>&1; then \ + cp -n "$$DOWNLOAD_DIR"/*.tar.gz "$$DOWNLOAD_DIR"/*.tar.xz "$$DOWNLOAD_DIR"/*.zip "$$CACHE_DIR/" 2>/dev/null || true; \ + fi +endef # For public CI runs, checkout folly in a way that can build with RocksDB. # This is mostly intended as a test-only simulation of Meta-internal folly @@ -117,26 +141,11 @@ checkout_folly: @# const mismatch perl -pi -e 's/: environ/: (const char**)(environ)/' third-party/folly/folly/Subprocess.cpp @# Restore cached downloads and handle unreliable mirrors with fallback - @cd third-party/folly && \ - DOWNLOAD_DIR=`$(PYTHON) build/fbcode_builder/getdeps.py show-inst-dir | sed 's|/installed/.*|/downloads|'` && \ - mkdir -p "$$DOWNLOAD_DIR" && \ - CACHE_DIR="/tmp/rocksdb-getdeps-cache" && \ - mkdir -p "$$CACHE_DIR" && \ - echo "Restoring cached downloads..." && \ - if ls "$$CACHE_DIR"/*.tar.gz "$$CACHE_DIR"/*.tar.xz "$$CACHE_DIR"/*.zip >/dev/null 2>&1; then \ - cp -n "$$CACHE_DIR"/*.tar.gz "$$CACHE_DIR"/*.tar.xz "$$CACHE_DIR"/*.zip "$$DOWNLOAD_DIR/" 2>/dev/null || true; \ - fi && \ - echo "Handling known unreliable downloads with fallback mirrors..." && \ - $(PYTHON) ../../build_tools/getdeps_fallback_mirror.py "$$DOWNLOAD_DIR" "$$CACHE_DIR" build/fbcode_builder/manifests + $(restore_folly_getdeps_downloads) @# NOTE: boost and fmt source will be needed for any build including `USE_FOLLY_LITE` builds as those depend on those headers cd third-party/folly && GETDEPS_USE_WGET=1 $(PYTHON) build/fbcode_builder/getdeps.py fetch boost && GETDEPS_USE_WGET=1 $(PYTHON) build/fbcode_builder/getdeps.py fetch fmt @# Update cache with any new downloads - @cd third-party/folly && \ - DOWNLOAD_DIR=`$(PYTHON) build/fbcode_builder/getdeps.py show-inst-dir | sed 's|/installed/.*|/downloads|'` && \ - CACHE_DIR="/tmp/rocksdb-getdeps-cache" && \ - if ls "$$DOWNLOAD_DIR"/*.tar.gz "$$DOWNLOAD_DIR"/*.tar.xz "$$DOWNLOAD_DIR"/*.zip >/dev/null 2>&1; then \ - cp -n "$$DOWNLOAD_DIR"/*.tar.gz "$$DOWNLOAD_DIR"/*.tar.xz "$$DOWNLOAD_DIR"/*.zip "$$CACHE_DIR/" 2>/dev/null || true; \ - fi + $(cache_folly_getdeps_downloads) CXX_M_FLAGS = $(filter -m%, $(CXXFLAGS)) @@ -155,6 +164,8 @@ build_folly: echo "Please run checkout_folly first"; \ false; \ fi + @# Restore fallback archives after the cleanup above removes downloads. + $(restore_folly_getdeps_downloads) cd third-party/folly && \ CXXFLAGS=" $(CXX_M_FLAGS) -DHAVE_CXX11_ATOMIC " GETDEPS_USE_WGET=1 $(PYTHON) build/fbcode_builder/getdeps.py build $(FOLLY_BUILD_FLAGS) @# In the folly build, glog and gflags are only built as dynamic libraries,