11#! /usr/bin/env bash
22
3- # install.sh installs common workbench tools in the devcontainer. Currently it
4- # only supports Debian-based systems (e.g. Ubuntu) on x86_64 .
3+ # install.sh installs common workbench tools in the devcontainer.
4+ # Designed as an isolated feature that does not interfere with the host image .
55
66set -o errexit
77set -o nounset
@@ -10,6 +10,8 @@ set -o xtrace
1010
1111readonly CLOUD=" ${CLOUD:- " " } "
1212readonly USERNAME=" ${USERNAME:- " root" } "
13+ readonly LIB_ENV=" ${LIBENV:- " /opt/conda/envs/workbench-ds" } "
14+ readonly LIB_PYTHON_VERSION=" ${LIBPYTHONVERSION:- " 3.10" } "
1315USER_HOME_DIR=" ${USERHOMEDIR:- " /home/${USERNAME} " } "
1416if [[ " ${USER_HOME_DIR} " == " /home/root" ]]; then
1517 USER_HOME_DIR=" /root"
@@ -23,10 +25,15 @@ WORKDIR="$(mktemp -d)"
2325readonly WORKDIR
2426
2527readonly WORKBENCH_TOOLS_DIR=" /opt/workbench-tools"
28+ export MAMBA_ROOT_PREFIX=" ${WORKBENCH_TOOLS_DIR} /conda-root"
2629
2730function cleanup() {
2831 rm -rf " ${WORKDIR:? } "
2932 rm -rf /var/lib/apt/lists/*
33+ # Clean up micromamba caches to save image space
34+ if command -v micromamba & > /dev/null; then
35+ micromamba clean --all --yes
36+ fi
3037}
3138
3239trap ' cleanup' EXIT
@@ -61,23 +68,27 @@ check_packages \
6168 git \
6269 sed \
6370 sudo \
64- tar
71+ tar \
72+ bzip2
6573
66- if ! mamba --version & > /dev/null; then
67- source ./install-conda.sh
74+ # Install Micromamba
75+ if ! type micromamba & > /dev/null; then
76+ echo " Installing Micromamba..."
77+ curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -C /usr/local -xvj bin/micromamba
6878fi
6979
7080# Install the samtools family of tools in a separate environment since some of
71- # the other tools depend on old versions of these. This will take priority in
72- # the PATH.
81+ # the other tools depend on old versions of these.
7382CONDA_PACKAGES_1=(
7483 " bcftools"
7584 " htslib" # includes bgzip and tabix
7685 " samtools"
7786)
7887
88+ # Environment 2 contains the genomics CLI tools. They will be added to the
89+ # PATH but will not be usable as Python libraries.
7990CONDA_PACKAGES_2=(
80- " python=3.9 "
91+ " python=3.10 "
8192 " pip"
8293 " perl==5.32.1"
8394 " bedtools"
@@ -91,13 +102,53 @@ CONDA_PACKAGES_2=(
91102 " vcftools"
92103)
93104
105+ # Environment 3 contains data science Python libraries. These should be
106+ # accessible from the user's default Python environment, which is why we install
107+ # them separately and give the user control over whether to inject them into an
108+ # existing environment or create a new one.
109+ CONDA_PACKAGES_3=(
110+ " python=${LIB_PYTHON_VERSION} "
111+ " google-cloud-storage"
112+ " ipykernel"
113+ " ipywidgets"
114+ " jupyter"
115+ " openai"
116+ " matplotlib"
117+ " numpy"
118+ " plotly"
119+ " pandas"
120+ " seaborn"
121+ " scikit-learn"
122+ " scipy"
123+ " tqdm"
124+ )
125+
126+ # Build isolated environments
94127mkdir -p " ${WORKBENCH_TOOLS_DIR} "
95- mamba create --prefix " ${WORKBENCH_TOOLS_DIR} /1" -c bioconda -y " ${CONDA_PACKAGES_1[@]} "
96- mamba create --prefix " ${WORKBENCH_TOOLS_DIR} /2" -c bioconda -y " ${CONDA_PACKAGES_2[@]} "
128+ echo " Building Environment 1 (Samtools family)..."
129+ micromamba create --prefix " ${WORKBENCH_TOOLS_DIR} /1" -c bioconda -c conda-forge -y " ${CONDA_PACKAGES_1[@]} "
130+
131+ echo " Building Environment 2 (Genomics CLI Tools)..."
132+ micromamba create --prefix " ${WORKBENCH_TOOLS_DIR} /2" -c bioconda -c conda-forge -y " ${CONDA_PACKAGES_2[@]} "
133+
134+ echo " Building Environment 3 (Python Libraries)..."
135+ LIB_ENV_EXISTS=0
136+
137+ if [ -d " ${LIB_ENV} " ]; then
138+ # SCENARIO A: Target environment already exists on host. Inject packages into it.
139+ LIB_ENV_EXISTS=1
140+ echo " Host environment detected at ${LIB_ENV} . Injecting data science packages..."
141+ micromamba install --prefix " ${LIB_ENV} " -c conda-forge -y " ${CONDA_PACKAGES_3[@]} "
142+ else
143+ # SCENARIO B: Target environment does not exist. Create it from scratch.
144+ echo " No host environment found. Creating standalone environment at ${LIB_ENV} ..."
145+ mkdir -p " $( dirname " ${LIB_ENV} " ) "
146+ micromamba create --prefix " ${LIB_ENV} " -c conda-forge -y " ${CONDA_PACKAGES_3[@]} "
147+ fi
97148
98149# Install dsub via pip if on GCP. The conda version is outdated.
99150if [[ " ${CLOUD} " == " gcp" ]]; then
100- " ${WORKBENCH_TOOLS_DIR} /2/bin/pip" install dsub
151+ " ${WORKBENCH_TOOLS_DIR} /2/bin/pip" install --no-cache-dir dsub
101152fi
102153
103154# Force the perl and python scripts to use the correct perl/python
@@ -106,28 +157,31 @@ find -L "${WORKBENCH_TOOLS_DIR}/2/bin" -type f -executable -exec \
106157 -e " 1s|^#\!/usr/bin/env perl\\ r\?$|#\!${WORKBENCH_TOOLS_DIR} /2/bin/perl|" \
107158 -e " 1s|^#\!/usr/bin/env python\\ r\?$|#\!${WORKBENCH_TOOLS_DIR} /2/bin/python|" {} \;
108159
109- # Make the login user the owner of the conda environment
160+ # Make the login user the owner of the conda environments
110161chown -R " ${USERNAME} :" " ${WORKBENCH_TOOLS_DIR} "
162+ chown -R " ${USERNAME} :" " ${LIB_ENV} "
111163
112164{
165+ echo " # Workbench Tools Configuration"
166+
167+ # If we created a standalone Python libraries environment from scratch, make it the default terminal Python.
168+ # If it already existed (LIB_ENV_EXISTS=1), we leave the host image's PATH untouched to prevent shadowing.
169+ if [[ " ${LIB_ENV_EXISTS} " == " 0" ]]; then
170+ printf ' export PATH="%s/bin:$PATH"\n' " ${LIB_ENV} "
171+ fi
172+
113173 # Set PATH to include workbench-tools binaries
114174 # shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
115- printf ' export PATH="%s:$PATH"\n' " ${WORKBENCH_TOOLS_DIR} /1/bin:${WORKBENCH_TOOLS_DIR} /2/bin"
116-
117- # Set CROMWELL_JAR environment variable
118- printf ' export CROMWELL_JAR="%s"\n' " ${WORKBENCH_TOOLS_DIR} /2/share/cromwell/cromwell.jar"
119-
120- # Make dsub a function that includes the correct PYTHONPATH. NeMo sets
121- # PYTHONPATH so we need to override it here. We use a function instead of an
122- # alias because aliases are not expanded in non-interactive shells.
123- # shellcheck disable=SC2016 # we want $PYTHONPATH to be evaluated at runtime
124- printf ' function dsub() (PYTHONPATH="%s/2/lib/python3.9/site-packages:${PYTHONPATH:-}" "%s/2/bin/dsub" "$@")\n' " ${WORKBENCH_TOOLS_DIR} " " ${WORKBENCH_TOOLS_DIR} "
175+ printf ' export PATH="$PATH:%s/1/bin:%s/2/bin"\n' " ${WORKBENCH_TOOLS_DIR} " " ${WORKBENCH_TOOLS_DIR} "
176+
177+ # Set Cromwell JAR
178+ printf ' export CROMWELL_JAR="%s/2/share/cromwell/cromwell.jar"\n' " ${WORKBENCH_TOOLS_DIR} "
125179} >> " ${USER_HOME_DIR} /.bashrc"
126180
127181# Allow .bashrc to be sourced in non-interactive shells
128182sed -i ' /^# If not running interactively/,/esac/d' " ${USER_HOME_DIR} /.bashrc" || true
129183
130184# Make sure the login user is the owner of their .bashrc
131- chown -R " ${USERNAME} :" " ${USER_HOME_DIR} /.bashrc"
185+ chown " ${USERNAME} :" " ${USER_HOME_DIR} /.bashrc"
132186
133- echo " Done !"
187+ echo " Workbench tools installation complete !"
0 commit comments