diff --git a/.gitignore b/.gitignore index 8e560bbe..05f2c5b2 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ wheels/ *.manifest #*.spec build_pyinstaller +build_pyinstaller_data dist_pyinstaller # Installer logs diff --git a/release/common/download_installer_data.sh b/release/common/download_installer_data.sh new file mode 100755 index 00000000..1e94035b --- /dev/null +++ b/release/common/download_installer_data.sh @@ -0,0 +1,98 @@ +#!/bin/bash +set -e -u + +INSTALLER_DATA_DIR=${INSTALLER_DATA_DIR:-build_pyinstaller_data} +ALPHAMAP_DATA_DIR="${INSTALLER_DATA_DIR}/alphamap/data" +ALPHAQUANT_RESOURCES_DIR="${INSTALLER_DATA_DIR}/alphaquant/resources" + +curl_with_retries() { + local args=( + -L + -f + --retry 3 + --retry-delay 2 + -H "User-Agent: alphaquant-release-build" + ) + + curl "${args[@]}" "$@" +} + +list_alphamap_data_downloads() { + python -c 'from urllib.parse import quote +from alphamap.organisms_data import all_organisms +base_url = "https://raw.githubusercontent.com/MannLabs/alphamap/main/alphamap/data/" +seen = set() +for organism in all_organisms.values(): + for key in ("fasta_name", "uniprot_name"): + name = organism[key] + if name not in seen: + seen.add(name) + print(f"{name}\t{base_url}{quote(name)}")' +} + +download_datashare_zip() { + local url=$1 + local label=$2 + local temp_zip + + temp_zip=$(mktemp) + echo "Downloading ${label} from datashare..." + if ! curl_with_retries "${url}" -o "${temp_zip}"; then + echo "Error: Failed to download ${label} from datashare" + rm -f "${temp_zip}" + exit 1 + fi + + echo "Extracting ${label}..." + unzip -o "${temp_zip}" -d "${ALPHAQUANT_RESOURCES_DIR}" + rm -f "${temp_zip}" +} + +rm -rf "${INSTALLER_DATA_DIR}" +mkdir -p "${ALPHAMAP_DATA_DIR}" "${ALPHAQUANT_RESOURCES_DIR}" + +echo "Downloading AlphaMap FASTA and CSV files..." +list_alphamap_data_downloads | \ + while IFS=$'\t' read -r filename url; do + if [ -z "$filename" ] || [ -z "$url" ]; then + echo "Warning: Empty URL detected, skipping..." + continue + fi + + echo "Downloading ${filename}..." + if ! curl_with_retries "$url" -o "${ALPHAMAP_DATA_DIR}/${filename}"; then + echo "Error: Failed to download ${filename}" + exit 1 + fi + done + +alphamap_file_count=$(find "${ALPHAMAP_DATA_DIR}" -type f \( -name "*.fasta" -o -name "*.csv" \) | wc -l) +echo "Downloaded ${alphamap_file_count} AlphaMap files" +if [ "${alphamap_file_count}" -eq 0 ]; then + echo "Error: No AlphaMap files were downloaded" + exit 1 +fi + +download_datashare_zip "https://datashare.biochem.mpg.de/s/ezPzeqStEgDD8gg/download" "reference databases" +download_datashare_zip "https://datashare.biochem.mpg.de/s/stH9pmNe6O9CRHG/download" "phosphopred databases" + +REFERENCE_DB_DIR="${ALPHAQUANT_RESOURCES_DIR}/reference_databases" +PHOSPHOPRED_DB_DIR="${ALPHAQUANT_RESOURCES_DIR}/phosphopred_databases" +HUMAN_PHOSPHO_FILE="${PHOSPHOPRED_DB_DIR}/human_uniprot_reviewed_phos_prob.tsv" + +if [ ! -d "${REFERENCE_DB_DIR}" ]; then + echo "Error: reference_databases directory not found at ${REFERENCE_DB_DIR}" + exit 1 +fi + +if [ ! -d "${PHOSPHOPRED_DB_DIR}" ]; then + echo "Error: phosphopred_databases directory not found at ${PHOSPHOPRED_DB_DIR}" + exit 1 +fi + +if [ ! -f "${HUMAN_PHOSPHO_FILE}" ]; then + echo "Error: Expected phosphopred database file not found at ${HUMAN_PHOSPHO_FILE}" + exit 1 +fi + +echo "Installer data staged in ${INSTALLER_DATA_DIR}" diff --git a/release/linux/build_installer_linux.sh b/release/linux/build_installer_linux.sh index 13a679f8..b89ac9f6 100755 --- a/release/linux/build_installer_linux.sh +++ b/release/linux/build_installer_linux.sh @@ -3,12 +3,13 @@ set -e -u # Build the installer for Linux. # This script must be run from the root of the repository. -rm -rf dist_pyinstaller build_pyinstaller +rm -rf dist_pyinstaller build_pyinstaller build_pyinstaller_data # Find the wheel file in dist directory WHL_NAME=$(cd dist && ls ./*.whl && cd ..) pip install "dist/${WHL_NAME}[stable,gui-stable,dask-stable]" +INSTALLER_DATA_DIR=build_pyinstaller_data release/common/download_installer_data.sh + # Creating the stand-alone pyinstaller folder pyinstaller release/pyinstaller/alphaquant.spec --distpath dist_pyinstaller --workpath build_pyinstaller -y - diff --git a/release/linux/build_package_linux.sh b/release/linux/build_package_linux.sh index ecc94f4c..98646e2d 100755 --- a/release/linux/build_package_linux.sh +++ b/release/linux/build_package_linux.sh @@ -13,9 +13,6 @@ rm -rf ${BUILD_NAME}.deb mkdir -p dist_pyinstaller/${BUILD_NAME}/usr/local/bin mv dist_pyinstaller/${PACKAGE_NAME} dist_pyinstaller/${BUILD_NAME}/usr/local/bin/${PACKAGE_NAME} -#make directory for AlphaMap. This is where AlphaMap stores downloaded data, such as fasta files -mkdir -p dist_pyinstaller/${BUILD_NAME}/usr/local/bin/${PACKAGE_NAME}/alphamap/data/ - mkdir dist_pyinstaller/${BUILD_NAME}/DEBIAN cp release/linux/control dist_pyinstaller/${BUILD_NAME}/DEBIAN dpkg-deb --build --root-owner-group dist_pyinstaller/${BUILD_NAME} diff --git a/release/macos/build_package_macos.sh b/release/macos/build_package_macos.sh index ecc3be79..9eb63e7c 100755 --- a/release/macos/build_package_macos.sh +++ b/release/macos/build_package_macos.sh @@ -8,6 +8,31 @@ PACKAGE_NAME=alphaquant # BUILD_NAME is taken from environment variables, e.g. alphaquant-1.2.3-macos-darwin-arm64 or alphaquant-1.2.3-macos-darwin-x64 rm -rf ${BUILD_NAME}.pkg +curl_with_retries() { + local args=( + -L + -f + --retry 3 + --retry-delay 2 + -H "User-Agent: alphaquant-release-build" + ) + + curl "${args[@]}" "$@" +} + +list_alphamap_data_downloads() { + python -c 'from urllib.parse import quote +from alphamap.organisms_data import all_organisms +base_url = "https://raw.githubusercontent.com/MannLabs/alphamap/main/alphamap/data/" +seen = set() +for organism in all_organisms.values(): + for key in ("fasta_name", "uniprot_name"): + name = organism[key] + if name not in seen: + seen.add(name) + print(f"{name}\t{base_url}{quote(name)}")' +} + # If needed, include additional source such as e.g.: # cp ../../alphaquant/data/*.fasta dist/alphaquant/data @@ -36,23 +61,14 @@ mkdir -p ${CONTENTS_FOLDER}/Frameworks/alphamap/data/ #### ####Download all AlphaMap FASTA and CSV files from GitHub, which are needed for the further analyses. There is a lot of error checking to ensure that the files get actually added during the build echo "Starting downloads of FASTA and CSV files..." -DOWNLOAD_LIST=$(curl -L -f https://api.github.com/repos/MannLabs/alphamap/contents/alphamap/data?ref=main) -if [ $? -ne 0 ]; then - echo "Error: Failed to fetch file list from GitHub API" - exit 1 -fi - -echo "$DOWNLOAD_LIST" | \ - grep "\"download_url\".*\.\(fasta\|csv\)\"" | \ - cut -d '"' -f 4 | \ - while read url; do - if [ -z "$url" ]; then +list_alphamap_data_downloads | \ + while IFS=$'\t' read -r filename url; do + if [ -z "$filename" ] || [ -z "$url" ]; then echo "Warning: Empty URL detected, skipping..." continue fi - filename=$(basename $url) echo "Downloading $filename..." - if ! curl -L -f "$url" -o "${CONTENTS_FOLDER}/Frameworks/alphamap/data/$filename"; then + if ! curl_with_retries "$url" -o "${CONTENTS_FOLDER}/Frameworks/alphamap/data/$filename"; then echo "Error: Failed to download $filename" exit 1 fi @@ -76,7 +92,7 @@ mkdir -p ${CONTENTS_FOLDER}/MacOS/_internal/alphaquant/resources/ # Download and extract the first zip file echo "Downloading and extracting first resource from datashare..." TEMP_ZIP1=$(mktemp) -if ! curl -L -f "https://datashare.biochem.mpg.de/s/ezPzeqStEgDD8gg/download" -o "$TEMP_ZIP1"; then +if ! curl_with_retries "https://datashare.biochem.mpg.de/s/ezPzeqStEgDD8gg/download" -o "$TEMP_ZIP1"; then echo "Error: Failed to download first resource from datashare" exit 1 fi @@ -87,7 +103,7 @@ rm "$TEMP_ZIP1" # Download and extract the second zip file echo "Downloading and extracting second resource from datashare..." TEMP_ZIP2=$(mktemp) -if ! curl -L -f "https://datashare.biochem.mpg.de/s/stH9pmNe6O9CRHG/download" -o "$TEMP_ZIP2"; then +if ! curl_with_retries "https://datashare.biochem.mpg.de/s/stH9pmNe6O9CRHG/download" -o "$TEMP_ZIP2"; then echo "Error: Failed to download second resource from datashare" exit 1 fi diff --git a/release/pyinstaller/alphaquant.spec b/release/pyinstaller/alphaquant.spec index e910ebe2..6d4b4478 100644 --- a/release/pyinstaller/alphaquant.spec +++ b/release/pyinstaller/alphaquant.spec @@ -2,6 +2,7 @@ import os import sys +from pathlib import Path from PyInstaller.building.build_main import Analysis, PYZ, EXE, COLLECT, BUNDLE, TOC import PyInstaller.utils.hooks @@ -43,6 +44,18 @@ hidden_imports = [h for h in hidden_imports if "__pycache__" not in h] # ) datas = [d for d in datas if ("__pycache__" not in d[0]) and (d[1] not in [".", "Resources", "scripts"])] +installer_data_dir = Path(os.environ.get("INSTALLER_DATA_DIR", "build_pyinstaller_data")) +if not installer_data_dir.is_absolute(): + installer_data_dir = Path(location) / installer_data_dir +for source_root, destination_root in ( + (installer_data_dir / "alphamap" / "data", Path("alphamap") / "data"), + (installer_data_dir / "alphaquant" / "resources", Path("alphaquant") / "resources"), +): + if source_root.exists(): + for source_file in source_root.rglob("*"): + if source_file.is_file(): + datas.append((str(source_file), str(destination_root / source_file.relative_to(source_root).parent))) + # add certifi to datas, otherwise ssh connections fail when they are triggered from the installer, because the certificates are not available # In the case of the AlphaQuant repo, AlphaMap needs to download data from GitHub and this fails without certifi datas.extend(PyInstaller.utils.hooks.collect_data_files('certifi'))