@@ -10,6 +10,8 @@ set -o xtrace
1010
1111readonly CLOUD=" ${CLOUD:- " " } "
1212readonly USERNAME=" ${USERNAME:- " root" } "
13+ readonly LIB_ENV=" ${LIBENV:- " /opt/conda/envs/workbench-ds" } "
14+ readonly LIB_PYTHON_VERSION=" ${LIBPYTHONVERSION:- " 3.10" } "
1315USER_HOME_DIR=" ${USERHOMEDIR:- " /home/${USERNAME} " } "
1416if [[ " ${USER_HOME_DIR} " == " /home/root" ]]; then
1517 USER_HOME_DIR=" /root"
@@ -68,32 +70,79 @@ if ! mamba --version &>/dev/null; then
6870fi
6971
7072# Install the samtools family of tools in a separate environment since some of
71- # the other tools depend on old versions of these. This will take priority in
72- # the PATH.
73+ # the other tools depend on old versions of these.
7374CONDA_PACKAGES_1=(
74- " bcftools"
75- " htslib" # includes bgzip and tabix
76- " samtools"
75+ " bioconda:: bcftools"
76+ " bioconda:: htslib" # includes bgzip and tabix
77+ " bioconda:: samtools"
7778)
7879
80+ # Environment 2 contains the genomics CLI tools. They will be added to the
81+ # PATH but will not be usable as Python libraries.
7982CONDA_PACKAGES_2=(
80- " python=3.9 "
81- " pip"
82- " perl==5.32.1"
83- " bedtools"
83+ " conda-forge:: python=3.10 "
84+ " conda-forge:: pip"
85+ " conda-forge:: perl==5.32.1"
86+ " bioconda:: bedtools"
8487 " conda-forge::bgenix"
8588 " conda-forge::cromwell"
86- " ensembl-vep>=115.1"
87- " nextflow"
88- " plink"
89- " plink2"
90- " regenie"
91- " vcftools"
89+ " bioconda:: ensembl-vep>=115.1"
90+ " bioconda:: nextflow"
91+ " bioconda:: plink"
92+ " bioconda:: plink2"
93+ " bioconda:: regenie"
94+ " bioconda:: vcftools"
9295)
9396
97+ # Environment 3 contains data science Python libraries. These should be
98+ # accessible from the user's default Python environment, which is why we install
99+ # them separately and give the user control over whether to inject them into an
100+ # existing environment or create a new one.
101+ CONDA_PACKAGES_3=(
102+ " conda-forge::google-cloud-storage"
103+ " conda-forge::ipykernel"
104+ " conda-forge::ipywidgets"
105+ " conda-forge::jupyter"
106+ " conda-forge::openai"
107+ " conda-forge::matplotlib"
108+ " conda-forge::numpy"
109+ " conda-forge::plotly"
110+ " conda-forge::pandas"
111+ " conda-forge::seaborn"
112+ " conda-forge::scikit-learn"
113+ " conda-forge::scipy"
114+ " conda-forge::tqdm"
115+ )
116+
117+ # Build isolated environments
94118mkdir -p " ${WORKBENCH_TOOLS_DIR} "
95- mamba create --prefix " ${WORKBENCH_TOOLS_DIR} /1" -c bioconda -y " ${CONDA_PACKAGES_1[@]} "
96- mamba create --prefix " ${WORKBENCH_TOOLS_DIR} /2" -c bioconda -y " ${CONDA_PACKAGES_2[@]} "
119+ echo " Building Environment 1 (Samtools family)..."
120+ mamba create --prefix " ${WORKBENCH_TOOLS_DIR} /1" -y " ${CONDA_PACKAGES_1[@]} "
121+
122+ echo " Building Environment 2 (Genomics CLI Tools)..."
123+ mamba create --prefix " ${WORKBENCH_TOOLS_DIR} /2" -y " ${CONDA_PACKAGES_2[@]} "
124+
125+ echo " Building Environment 3 (Python Libraries)..."
126+ LIB_ENV_EXISTS=0
127+
128+ if [ -d " ${LIB_ENV} " ]; then
129+ # SCENARIO A: Target environment already exists on host. Inject packages into it.
130+ LIB_ENV_EXISTS=1
131+ echo " Host environment detected at ${LIB_ENV} . Injecting data science packages..."
132+
133+ if mamba list -p /opt/conda/envs/jupyter --full-name python --json | jq -e ' length == 0' > /dev/null; then
134+ echo " No Python installation found in host environment. Adding python=${LIB_PYTHON_VERSION} to package list."
135+ CONDA_PACKAGES_3+=(" conda-forge::python=${LIB_PYTHON_VERSION} " )
136+ fi
137+ mamba install --prefix " ${LIB_ENV} " -y " ${CONDA_PACKAGES_3[@]} "
138+ else
139+ # SCENARIO B: Target environment does not exist. Create it from scratch.
140+ echo " No host environment found. Creating standalone environment at ${LIB_ENV} ..."
141+ mkdir -p " $( dirname " ${LIB_ENV} " ) "
142+
143+ CONDA_PACKAGES_3+=(" conda-forge::python=${LIB_PYTHON_VERSION} " )
144+ mamba create --prefix " ${LIB_ENV} " -y " ${CONDA_PACKAGES_3[@]} "
145+ fi
97146
98147# Install dsub via pip if on GCP. The conda version is outdated.
99148if [[ " ${CLOUD} " == " gcp" ]]; then
@@ -106,28 +155,32 @@ find -L "${WORKBENCH_TOOLS_DIR}/2/bin" -type f -executable -exec \
106155 -e " 1s|^#\!/usr/bin/env perl\\ r\?$|#\!${WORKBENCH_TOOLS_DIR} /2/bin/perl|" \
107156 -e " 1s|^#\!/usr/bin/env python\\ r\?$|#\!${WORKBENCH_TOOLS_DIR} /2/bin/python|" {} \;
108157
109- # Make the login user the owner of the conda environment
158+ # Make the login user the owner of the conda environments
110159chown -R " ${USERNAME} :" " ${WORKBENCH_TOOLS_DIR} "
160+ chown -R " ${USERNAME} :" " ${LIB_ENV} "
111161
112162{
163+ echo " # Workbench Tools Configuration"
164+
165+ # If we created a standalone Python libraries environment from scratch, make it the default terminal Python.
166+ # If it already existed (LIB_ENV_EXISTS=1), we leave the host image's PATH untouched to prevent shadowing.
167+ if [[ " ${LIB_ENV_EXISTS} " == " 0" ]]; then
168+ # shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
169+ printf ' export PATH="%s:$PATH"\n' " ${LIB_ENV} /bin"
170+ fi
171+
113172 # Set PATH to include workbench-tools binaries
114173 # shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
115- printf ' export PATH="%s: $PATH"\n' " ${WORKBENCH_TOOLS_DIR} /1/bin:${WORKBENCH_TOOLS_DIR} /2/bin"
174+ printf ' export PATH="$PATH:%s "\n' " ${WORKBENCH_TOOLS_DIR} /1/bin:${WORKBENCH_TOOLS_DIR} /2/bin"
116175
117176 # Set CROMWELL_JAR environment variable
118177 printf ' export CROMWELL_JAR="%s"\n' " ${WORKBENCH_TOOLS_DIR} /2/share/cromwell/cromwell.jar"
119-
120- # Make dsub a function that includes the correct PYTHONPATH. NeMo sets
121- # PYTHONPATH so we need to override it here. We use a function instead of an
122- # alias because aliases are not expanded in non-interactive shells.
123- # shellcheck disable=SC2016 # we want $PYTHONPATH to be evaluated at runtime
124- printf ' function dsub() (PYTHONPATH="%s/2/lib/python3.9/site-packages:${PYTHONPATH:-}" "%s/2/bin/dsub" "$@")\n' " ${WORKBENCH_TOOLS_DIR} " " ${WORKBENCH_TOOLS_DIR} "
125178} >> " ${USER_HOME_DIR} /.bashrc"
126179
127180# Allow .bashrc to be sourced in non-interactive shells
128181sed -i ' /^# If not running interactively/,/esac/d' " ${USER_HOME_DIR} /.bashrc" || true
129182
130183# Make sure the login user is the owner of their .bashrc
131- chown -R " ${USERNAME} :" " ${USER_HOME_DIR} /.bashrc"
184+ chown " ${USERNAME} :" " ${USER_HOME_DIR} /.bashrc"
132185
133- echo " Done !"
186+ echo " Workbench tools installation complete !"
0 commit comments