@@ -10,6 +10,8 @@ set -o xtrace
1010
1111readonly CLOUD=" ${CLOUD:- " " } "
1212readonly USERNAME=" ${USERNAME:- " root" } "
13+ readonly LIBRARIES_ENV_DIR=" ${LIBENV:- " /opt/conda/envs/workbench-ds" } "
14+ readonly LIB_PYTHON_VERSION=" ${LIBPYTHONVERSION:- " 3.14" } "
1315USER_HOME_DIR=" ${USERHOMEDIR:- " /home/${USERNAME} " } "
1416if [[ " ${USER_HOME_DIR} " == " /home/root" ]]; then
1517 USER_HOME_DIR=" /root"
@@ -68,66 +70,122 @@ if ! mamba --version &>/dev/null; then
6870fi
6971
7072# Install the samtools family of tools in a separate environment since some of
71- # the other tools depend on old versions of these. This will take priority in
72- # the PATH.
73- CONDA_PACKAGES_1=(
74- " bcftools"
75- " htslib" # includes bgzip and tabix
76- " samtools"
73+ # the other tools depend on old versions of these.
74+ readonly CONDA_PACKAGES_SAMTOOLS=(
75+ " bioconda::bcftools>=1.23"
76+ " bioconda::htslib>=1.23" # includes bgzip and tabix
77+ " bioconda::samtools>=1.23"
7778)
78-
79- CONDA_PACKAGES_2=(
80- " python=3.9"
81- " pip"
82- " perl==5.32.1"
83- " bedtools"
79+ readonly SAMTOOLS_ENV_DIR=" ${WORKBENCH_TOOLS_DIR} /samtools"
80+
81+ # Environment 2 contains the genomics CLI tools. They will be added to the
82+ # PATH but will not be usable as Python libraries.
83+ readonly CONDA_PACKAGES_BINARIES=(
84+ " conda-forge::python"
85+ " conda-forge::pip"
86+ " conda-forge::perl>=5.32"
87+ " bioconda::bedtools"
8488 " conda-forge::bgenix"
8589 " conda-forge::cromwell"
86- " ensembl-vep>=115.1"
87- " nextflow"
88- " plink"
89- " plink2"
90- " regenie"
91- " vcftools"
90+ " bioconda::ensembl-vep>=115"
91+ " bioconda::nextflow"
92+ " bioconda::plink"
93+ " bioconda::plink2"
94+ " bioconda::regenie"
95+ " bioconda::vcftools"
96+ )
97+ readonly BINARIES_ENV_DIR=" ${WORKBENCH_TOOLS_DIR} /binaries"
98+
99+ # Environment 3 contains data science Python libraries. These should be
100+ # accessible from the user's default Python environment, which is why we install
101+ # them separately and give the user control over whether to inject them into an
102+ # existing environment or create a new one.
103+ CONDA_PACKAGES_LIBRARIES=(
104+ " conda-forge::google-cloud-storage"
105+ " conda-forge::ipykernel"
106+ " conda-forge::ipywidgets"
107+ " conda-forge::jupyter"
108+ " conda-forge::openai"
109+ " conda-forge::matplotlib"
110+ " conda-forge::numpy"
111+ " conda-forge::plotly"
112+ " conda-forge::pandas"
113+ " conda-forge::seaborn"
114+ " conda-forge::scikit-learn"
115+ " conda-forge::scipy"
116+ " conda-forge::tqdm"
92117)
93118
119+ # Build isolated environments
94120mkdir -p " ${WORKBENCH_TOOLS_DIR} "
95- mamba create --prefix " ${WORKBENCH_TOOLS_DIR} /1" -c bioconda -y " ${CONDA_PACKAGES_1[@]} "
96- mamba create --prefix " ${WORKBENCH_TOOLS_DIR} /2" -c bioconda -y " ${CONDA_PACKAGES_2[@]} "
121+ echo " Building Environment 1 (Samtools family)..."
122+ mamba create --prefix " ${SAMTOOLS_ENV_DIR} " -y " ${CONDA_PACKAGES_SAMTOOLS[@]} "
123+
124+ echo " Building Environment 2 (Genomics CLI Tools)..."
125+ mamba create --prefix " ${BINARIES_ENV_DIR} " -y " ${CONDA_PACKAGES_BINARIES[@]} "
126+
127+ echo " Building Environment 3 (Python Libraries)..."
128+ LIB_ENV_EXISTS=0
129+
130+ if [ -d " ${LIBRARIES_ENV_DIR} " ]; then
131+ # SCENARIO A: Target environment already exists on host. Inject packages into it.
132+ LIB_ENV_EXISTS=1
133+ echo " Host environment detected at ${LIBRARIES_ENV_DIR} . Injecting data science packages..."
134+
135+ if mamba list -p " ${LIBRARIES_ENV_DIR} " --full-name python --json | jq -e ' length == 0' > /dev/null; then
136+ echo " No Python installation found in host environment. Adding python=${LIB_PYTHON_VERSION} to package list."
137+ CONDA_PACKAGES_LIBRARIES+=(" conda-forge::python=${LIB_PYTHON_VERSION} " )
138+ fi
139+ mamba install --prefix " ${LIBRARIES_ENV_DIR} " -y " ${CONDA_PACKAGES_LIBRARIES[@]} "
140+ else
141+ # SCENARIO B: Target environment does not exist. Create it from scratch.
142+ echo " No host environment found. Creating standalone environment at ${LIBRARIES_ENV_DIR} ..."
143+ mkdir -p " $( dirname " ${LIBRARIES_ENV_DIR} " ) "
144+
145+ CONDA_PACKAGES_LIBRARIES+=(" conda-forge::python=${LIB_PYTHON_VERSION} " )
146+ mamba create --prefix " ${LIBRARIES_ENV_DIR} " -y " ${CONDA_PACKAGES_LIBRARIES[@]} "
147+ fi
97148
98149# Install dsub via pip if on GCP. The conda version is outdated.
150+ # dsub is installed in LIBRARIES_ENV_DIR because it can be used as a Python
151+ # library, and users may want to install additional packages alongside it.
152+ # PYTHONNOUSERSITE=1 prevents pip from seeing/modifying packages in user site-packages.
99153if [[ " ${CLOUD} " == " gcp" ]]; then
100- " ${WORKBENCH_TOOLS_DIR} /2 /bin/pip" install dsub
154+ PYTHONNOUSERSITE=1 " ${LIBRARIES_ENV_DIR} /bin/pip" install dsub
101155fi
102156
103157# Force the perl and python scripts to use the correct perl/python
104- find -L " ${WORKBENCH_TOOLS_DIR} /2 /bin" -type f -executable -exec \
158+ find -L " ${BINARIES_ENV_DIR} /bin" -type f -executable -exec \
105159 sed -i --follow-symlinks \
106- -e " 1s|^#\!/usr/bin/env perl\\ r\?$|#\!${WORKBENCH_TOOLS_DIR} /2 /bin/perl|" \
107- -e " 1s|^#\!/usr/bin/env python\\ r\?$|#\!${WORKBENCH_TOOLS_DIR} /2 /bin/python|" {} \;
160+ -e " 1s|^#\!/usr/bin/env perl\\ r\?$|#\!${BINARIES_ENV_DIR} /bin/perl|" \
161+ -e " 1s|^#\!/usr/bin/env python\\ r\?$|#\!${BINARIES_ENV_DIR} /bin/python|" {} \;
108162
109- # Make the login user the owner of the conda environment
163+ # Make the login user the owner of the conda environments
110164chown -R " ${USERNAME} :" " ${WORKBENCH_TOOLS_DIR} "
165+ chown -R " ${USERNAME} :" " ${LIBRARIES_ENV_DIR} "
111166
112167{
168+ echo " # Workbench Tools Configuration"
169+
170+ # If we created a standalone Python libraries environment from scratch, make it the default terminal Python.
171+ # If it already existed (LIB_ENV_EXISTS=1), we leave the host image's PATH untouched to prevent shadowing.
172+ if [[ " ${LIB_ENV_EXISTS} " == " 0" ]]; then
173+ # shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
174+ printf ' export PATH="%s:$PATH"\n' " ${LIBRARIES_ENV_DIR} /bin"
175+ fi
176+
113177 # Set PATH to include workbench-tools binaries
114178 # shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
115- printf ' export PATH="%s: $PATH"\n' " ${WORKBENCH_TOOLS_DIR} /1/ bin:${WORKBENCH_TOOLS_DIR} /2 /bin"
179+ printf ' export PATH="$PATH:%s "\n' " ${SAMTOOLS_ENV_DIR} / bin:${BINARIES_ENV_DIR} /bin"
116180
117181 # Set CROMWELL_JAR environment variable
118- printf ' export CROMWELL_JAR="%s"\n' " ${WORKBENCH_TOOLS_DIR} /2/share/cromwell/cromwell.jar"
119-
120- # Make dsub a function that includes the correct PYTHONPATH. NeMo sets
121- # PYTHONPATH so we need to override it here. We use a function instead of an
122- # alias because aliases are not expanded in non-interactive shells.
123- # shellcheck disable=SC2016 # we want $PYTHONPATH to be evaluated at runtime
124- printf ' function dsub() (PYTHONPATH="%s/2/lib/python3.9/site-packages:${PYTHONPATH:-}" "%s/2/bin/dsub" "$@")\n' " ${WORKBENCH_TOOLS_DIR} " " ${WORKBENCH_TOOLS_DIR} "
182+ printf ' export CROMWELL_JAR="%s"\n' " ${BINARIES_ENV_DIR} /share/cromwell/cromwell.jar"
125183} >> " ${USER_HOME_DIR} /.bashrc"
126184
127185# Allow .bashrc to be sourced in non-interactive shells
128186sed -i ' /^# If not running interactively/,/esac/d' " ${USER_HOME_DIR} /.bashrc" || true
129187
130188# Make sure the login user is the owner of their .bashrc
131- chown -R " ${USERNAME} :" " ${USER_HOME_DIR} /.bashrc"
189+ chown " ${USERNAME} :" " ${USER_HOME_DIR} /.bashrc"
132190
133- echo " Done !"
191+ echo " Workbench tools installation complete !"
0 commit comments