Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 176 additions & 74 deletions build_tools/getdeps_fallback_mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,121 +6,223 @@
Pre-download packages with unreliable mirrors using fallback mirrors.
Reads package info from folly's getdeps manifest files.
"""
import sys
import os
import hashlib
import subprocess
import configparser
import hashlib
import os
import shutil
import sys
import urllib.request

DOWNLOAD_TIMEOUT_SECONDS = 120
DOWNLOAD_CHUNK_BYTES = 64 * 1024
MAX_DOWNLOAD_BYTES = 50 * 1024 * 1024

MIRROR_FALLBACKS = {
"ftpmirror.gnu.org/gnu/": [
"https://mirrors.kernel.org/gnu/",
"https://ftpmirror.gnu.org/gnu/",
"https://ftp.gnu.org/gnu/",
],
"ftp.gnu.org/gnu/": [
"https://mirrors.kernel.org/gnu/",
"https://ftpmirror.gnu.org/gnu/",
"https://ftp.gnu.org/gnu/",
],
}

# These packages must have URLs matching MIRROR_FALLBACKS; other packages are
# left for getdeps.py's normal download path.
PACKAGES_TO_CHECK = ("autoconf", "automake", "libtool", "libiberty")


def sha256_file(path):
"""Calculate SHA256 hash of a file."""
h = hashlib.sha256()
try:
with open(path, 'rb') as f:
for chunk in iter(lambda: f.read(65536), b''):
with open(path, "rb") as f:
for chunk in iter(lambda: f.read(65536), b""):
h.update(chunk)
return h.hexdigest()
except Exception:
return None


def parse_manifest(manifest_path):
"""Parse a getdeps manifest file to extract download info."""
config = configparser.ConfigParser()
# folly manifests can contain bare keys in sections unrelated to downloads.
config = configparser.ConfigParser(allow_no_value=True, interpolation=None)
try:
config.read(manifest_path)
if 'download' in config:
return {
'url': config['download'].get('url', ''),
'sha256': config['download'].get('sha256', ''),
}
except Exception:
pass
with open(manifest_path, encoding="utf-8") as manifest_file:
config.read_file(manifest_file)
except Exception as ex:
print(f" {os.path.basename(manifest_path)}: WARNING - parse failed: {ex}")
return None

if "download" in config:
return {
"url": config["download"].get("url", ""),
"sha256": config["download"].get("sha256", ""),
}
return None


def file_size(path):
try:
return os.path.getsize(path)
except Exception:
return None


def get_fallback_mirrors(url):
"""Get fallback mirror URLs for a given URL."""
# Fallback mirror patterns for known unreliable hosts
mirror_fallbacks = {
"ftp.gnu.org/gnu/": [
"https://mirrors.kernel.org/gnu/",
"https://ftpmirror.gnu.org/gnu/",
"https://ftp.gnu.org/gnu/",
],
"ftpmirror.gnu.org/gnu/": [
"https://mirrors.kernel.org/gnu/",
"https://ftpmirror.gnu.org/gnu/",
"https://ftp.gnu.org/gnu/",
],
}

for pattern, mirrors in mirror_fallbacks.items():
for pattern, mirrors in MIRROR_FALLBACKS.items():
if pattern in url:
# Extract the path after the pattern
path_start = url.find(pattern) + len(pattern)
path = url[path_start:]
return [mirror + path for mirror in mirrors]
return [url] # No fallback, use original
return []


def download_url(url, filepath):
"""Download URL to filepath without leaving partial files behind."""
tmp_filepath = filepath + ".tmp"
if os.path.exists(tmp_filepath):
os.remove(tmp_filepath)

request = urllib.request.Request(
url, headers={"User-Agent": "rocksdb-getdeps-fallback/1.0"}
)
try:
with urllib.request.urlopen(
request, timeout=DOWNLOAD_TIMEOUT_SECONDS
) as response, open(tmp_filepath, "wb") as output:
copied = 0
while True:
chunk = response.read(DOWNLOAD_CHUNK_BYTES)
if not chunk:
break

copied += len(chunk)
if copied > MAX_DOWNLOAD_BYTES:
raise Exception(
f"download exceeds {MAX_DOWNLOAD_BYTES} bytes"
)
output.write(chunk)
os.replace(tmp_filepath, filepath)
finally:
if os.path.exists(tmp_filepath):
os.remove(tmp_filepath)


def prepare_download(package, info, download_dir, cache_dir):
url = info["url"]
expected_sha256 = info["sha256"]
mirrors = get_fallback_mirrors(url)
if not mirrors:
return False

if not expected_sha256:
print(f" {package}: WARNING - skipped fallback without sha256")
return False

# getdeps uses format: {package}-{filename}
filename = f"{package}-{os.path.basename(url)}"
filepath = os.path.join(download_dir, filename)
cache_path = os.path.join(cache_dir, filename)

# Check if already valid.
actual_sha256 = sha256_file(filepath) if os.path.exists(filepath) else None
if actual_sha256 == expected_sha256:
print(f" {filename}: OK (already downloaded)")
return True
if actual_sha256 is not None:
print(
f" {filename}: WARNING - removing invalid download "
f"sha256={actual_sha256}"
)
os.remove(filepath)

# The cache is only an opportunistic single-build accelerator; callers
# should not share it across concurrent builds without external locking.
actual_sha256 = sha256_file(cache_path) if os.path.exists(cache_path) else None
if actual_sha256 == expected_sha256:
print(f" {filename}: OK (from cache)")
shutil.copy2(cache_path, filepath)
return True
if actual_sha256 is not None:
print(
f" {filename}: WARNING - removing invalid cache "
f"sha256={actual_sha256}"
)
os.remove(cache_path)

# Try fallback mirrors.
for mirror_url in mirrors:
print(f" {filename}: trying {mirror_url}...")
try:
download_url(mirror_url, filepath)
except Exception as ex:
print(f" {filename}: WARNING - download failed: {ex}")
continue

actual_sha256 = sha256_file(filepath)
if actual_sha256 == expected_sha256:
size = file_size(filepath)
print(f" {filename}: OK (downloaded, {size} bytes)")
shutil.copy2(filepath, cache_path)
return True

size = file_size(filepath)
print(
f" {filename}: WARNING - sha256 mismatch from {mirror_url}: "
f"expected={expected_sha256} actual={actual_sha256} size={size}"
)
os.remove(filepath)

print(f" {filename}: WARNING - all mirrors failed")
return False


def main():
if len(sys.argv) != 4:
print(f"Usage: {sys.argv[0]} <download_dir> <cache_dir> <manifests_dir>")
sys.exit(1)

download_dir, cache_dir, manifests_dir = sys.argv[1], sys.argv[2], sys.argv[3]
os.makedirs(download_dir, exist_ok=True)
os.makedirs(cache_dir, exist_ok=True)

# Packages known to have unreliable mirrors
packages_to_check = ["autoconf", "automake", "libtool"]

for package in packages_to_check:
checked = 0
ready = 0
for package in PACKAGES_TO_CHECK:
manifest_path = os.path.join(manifests_dir, package)
if not os.path.exists(manifest_path):
if not os.path.isfile(manifest_path):
continue

info = parse_manifest(manifest_path)
if not info or not info['url'] or not info['sha256']:
if not info or not info["url"]:
continue

# Determine filename from URL
url = info['url']
expected_sha256 = info['sha256']
url_filename = os.path.basename(url)

# getdeps uses format: {package}-{filename}
filename = f"{package}-{url_filename}"
filepath = os.path.join(download_dir, filename)
cache_path = os.path.join(cache_dir, filename)

# Check if already valid
if os.path.exists(filepath) and sha256_file(filepath) == expected_sha256:
print(f" {filename}: OK (already downloaded)")
if not info["sha256"]:
print(f" {package}: WARNING - skipped fallback without sha256")
continue

# Check cache
if os.path.exists(cache_path) and sha256_file(cache_path) == expected_sha256:
print(f" {filename}: OK (from cache)")
subprocess.run(['cp', cache_path, filepath], check=True)
if not get_fallback_mirrors(info["url"]):
print(
f" {package}: WARNING - skipped fallback without known mirror "
f"for {info['url']}"
)
continue

# Try fallback mirrors
mirrors = get_fallback_mirrors(url)
downloaded = False
for mirror_url in mirrors:
print(f" {filename}: trying {mirror_url}...")
try:
subprocess.run(['wget', '-q', '-O', filepath, mirror_url], check=True, timeout=120)
if sha256_file(filepath) == expected_sha256:
print(f" {filename}: OK (downloaded)")
subprocess.run(['cp', filepath, cache_path], check=False)
downloaded = True
break
else:
os.remove(filepath)
except Exception:
if os.path.exists(filepath):
os.remove(filepath)

if not downloaded:
print(f" {filename}: WARNING - all mirrors failed")
checked += 1
try:
if prepare_download(package, info, download_dir, cache_dir):
ready += 1
except Exception as ex:
print(f" {package}: WARNING - fallback preparation failed: {ex}")

print(f" fallback mirror downloads ready: {ready}/{checked}")

if __name__ == "__main__":
main()
45 changes: 28 additions & 17 deletions folly.mk
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,30 @@ endif # FMT_SOURCE_PATH
endif

FOLLY_COMMIT_HASH = 548b16da0b3cc887d69cdb6ae06496ad8a2a9276
FOLLY_GETDEPS_CACHE_DIR = /tmp/rocksdb-getdeps-cache

define restore_folly_getdeps_downloads
@cd third-party/folly && \
DOWNLOAD_DIR=`$(PYTHON) build/fbcode_builder/getdeps.py show-inst-dir | sed 's|/installed/.*|/downloads|'` && \
mkdir -p "$$DOWNLOAD_DIR" && \
CACHE_DIR="$(FOLLY_GETDEPS_CACHE_DIR)" && \
mkdir -p "$$CACHE_DIR" && \
echo "Restoring cached downloads..." && \
if ls "$$CACHE_DIR"/*.tar.gz "$$CACHE_DIR"/*.tar.xz "$$CACHE_DIR"/*.zip >/dev/null 2>&1; then \
cp -n "$$CACHE_DIR"/*.tar.gz "$$CACHE_DIR"/*.tar.xz "$$CACHE_DIR"/*.zip "$$DOWNLOAD_DIR/" 2>/dev/null || true; \
fi && \
echo "Handling known unreliable downloads with fallback mirrors..." && \
$(PYTHON) ../../build_tools/getdeps_fallback_mirror.py "$$DOWNLOAD_DIR" "$$CACHE_DIR" build/fbcode_builder/manifests
endef

define cache_folly_getdeps_downloads
@cd third-party/folly && \
DOWNLOAD_DIR=`$(PYTHON) build/fbcode_builder/getdeps.py show-inst-dir | sed 's|/installed/.*|/downloads|'` && \
CACHE_DIR="$(FOLLY_GETDEPS_CACHE_DIR)" && \
if ls "$$DOWNLOAD_DIR"/*.tar.gz "$$DOWNLOAD_DIR"/*.tar.xz "$$DOWNLOAD_DIR"/*.zip >/dev/null 2>&1; then \
cp -n "$$DOWNLOAD_DIR"/*.tar.gz "$$DOWNLOAD_DIR"/*.tar.xz "$$DOWNLOAD_DIR"/*.zip "$$CACHE_DIR/" 2>/dev/null || true; \
fi
endef

# For public CI runs, checkout folly in a way that can build with RocksDB.
# This is mostly intended as a test-only simulation of Meta-internal folly
Expand All @@ -117,26 +141,11 @@ checkout_folly:
@# const mismatch
perl -pi -e 's/: environ/: (const char**)(environ)/' third-party/folly/folly/Subprocess.cpp
@# Restore cached downloads and handle unreliable mirrors with fallback
@cd third-party/folly && \
DOWNLOAD_DIR=`$(PYTHON) build/fbcode_builder/getdeps.py show-inst-dir | sed 's|/installed/.*|/downloads|'` && \
mkdir -p "$$DOWNLOAD_DIR" && \
CACHE_DIR="/tmp/rocksdb-getdeps-cache" && \
mkdir -p "$$CACHE_DIR" && \
echo "Restoring cached downloads..." && \
if ls "$$CACHE_DIR"/*.tar.gz "$$CACHE_DIR"/*.tar.xz "$$CACHE_DIR"/*.zip >/dev/null 2>&1; then \
cp -n "$$CACHE_DIR"/*.tar.gz "$$CACHE_DIR"/*.tar.xz "$$CACHE_DIR"/*.zip "$$DOWNLOAD_DIR/" 2>/dev/null || true; \
fi && \
echo "Handling known unreliable downloads with fallback mirrors..." && \
$(PYTHON) ../../build_tools/getdeps_fallback_mirror.py "$$DOWNLOAD_DIR" "$$CACHE_DIR" build/fbcode_builder/manifests
$(restore_folly_getdeps_downloads)
@# NOTE: boost and fmt source will be needed for any build including `USE_FOLLY_LITE` builds as those depend on those headers
cd third-party/folly && GETDEPS_USE_WGET=1 $(PYTHON) build/fbcode_builder/getdeps.py fetch boost && GETDEPS_USE_WGET=1 $(PYTHON) build/fbcode_builder/getdeps.py fetch fmt
@# Update cache with any new downloads
@cd third-party/folly && \
DOWNLOAD_DIR=`$(PYTHON) build/fbcode_builder/getdeps.py show-inst-dir | sed 's|/installed/.*|/downloads|'` && \
CACHE_DIR="/tmp/rocksdb-getdeps-cache" && \
if ls "$$DOWNLOAD_DIR"/*.tar.gz "$$DOWNLOAD_DIR"/*.tar.xz "$$DOWNLOAD_DIR"/*.zip >/dev/null 2>&1; then \
cp -n "$$DOWNLOAD_DIR"/*.tar.gz "$$DOWNLOAD_DIR"/*.tar.xz "$$DOWNLOAD_DIR"/*.zip "$$CACHE_DIR/" 2>/dev/null || true; \
fi
$(cache_folly_getdeps_downloads)

CXX_M_FLAGS = $(filter -m%, $(CXXFLAGS))

Expand All @@ -155,6 +164,8 @@ build_folly:
echo "Please run checkout_folly first"; \
false; \
fi
@# Restore fallback archives after the cleanup above removes downloads.
$(restore_folly_getdeps_downloads)
cd third-party/folly && \
CXXFLAGS=" $(CXX_M_FLAGS) -DHAVE_CXX11_ATOMIC " GETDEPS_USE_WGET=1 $(PYTHON) build/fbcode_builder/getdeps.py build $(FOLLY_BUILD_FLAGS)
@# In the folly build, glog and gflags are only built as dynamic libraries,
Expand Down
Loading