@@ -10,8 +10,6 @@ set -o xtrace
1010
1111readonly CLOUD=" ${CLOUD:- " " } "
1212readonly USERNAME=" ${USERNAME:- " root" } "
13- readonly LIBRARIES_ENV_DIR=" ${LIBENV:- " /opt/conda/envs/workbench-ds" } "
14- readonly LIB_PYTHON_VERSION=" ${LIBPYTHONVERSION:- " 3.14" } "
1513USER_HOME_DIR=" ${USERHOMEDIR:- " /home/${USERNAME} " } "
1614if [[ " ${USER_HOME_DIR} " == " /home/root" ]]; then
1715 USER_HOME_DIR=" /root"
@@ -70,122 +68,66 @@ if ! mamba --version &>/dev/null; then
7068fi
7169
7270# Install the samtools family of tools in a separate environment since some of
73- # the other tools depend on old versions of these.
74- readonly CONDA_PACKAGES_SAMTOOLS=(
75- " bioconda::bcftools>=1.23"
76- " bioconda::htslib>=1.23" # includes bgzip and tabix
77- " bioconda::samtools>=1.23"
71+ # the other tools depend on old versions of these. This will take priority in
72+ # the PATH.
73+ CONDA_PACKAGES_1=(
74+ " bcftools"
75+ " htslib" # includes bgzip and tabix
76+ " samtools"
7877)
79- readonly SAMTOOLS_ENV_DIR=" ${WORKBENCH_TOOLS_DIR} /samtools"
80-
81- # Environment 2 contains the genomics CLI tools. They will be added to the
82- # PATH but will not be usable as Python libraries.
83- readonly CONDA_PACKAGES_BINARIES=(
84- " conda-forge::python"
85- " conda-forge::pip"
86- " conda-forge::perl>=5.32"
87- " bioconda::bedtools"
78+
79+ CONDA_PACKAGES_2=(
80+ " python=3.9"
81+ " pip"
82+ " perl==5.32.1"
83+ " bedtools"
8884 " conda-forge::bgenix"
8985 " conda-forge::cromwell"
90- " bioconda::ensembl-vep>=115"
91- " bioconda::nextflow"
92- " bioconda::plink"
93- " bioconda::plink2"
94- " bioconda::regenie"
95- " bioconda::vcftools"
96- )
97- readonly BINARIES_ENV_DIR=" ${WORKBENCH_TOOLS_DIR} /binaries"
98-
99- # Environment 3 contains data science Python libraries. These should be
100- # accessible from the user's default Python environment, which is why we install
101- # them separately and give the user control over whether to inject them into an
102- # existing environment or create a new one.
103- CONDA_PACKAGES_LIBRARIES=(
104- " conda-forge::google-cloud-storage"
105- " conda-forge::ipykernel"
106- " conda-forge::ipywidgets"
107- " conda-forge::jupyter"
108- " conda-forge::openai"
109- " conda-forge::matplotlib"
110- " conda-forge::numpy"
111- " conda-forge::plotly"
112- " conda-forge::pandas"
113- " conda-forge::seaborn"
114- " conda-forge::scikit-learn"
115- " conda-forge::scipy"
116- " conda-forge::tqdm"
86+ " ensembl-vep>=115.1"
87+ " nextflow"
88+ " plink"
89+ " plink2"
90+ " regenie"
91+ " vcftools"
11792)
11893
119- # Build isolated environments
12094mkdir -p " ${WORKBENCH_TOOLS_DIR} "
121- echo " Building Environment 1 (Samtools family)..."
122- mamba create --prefix " ${SAMTOOLS_ENV_DIR} " -y " ${CONDA_PACKAGES_SAMTOOLS[@]} "
123-
124- echo " Building Environment 2 (Genomics CLI Tools)..."
125- mamba create --prefix " ${BINARIES_ENV_DIR} " -y " ${CONDA_PACKAGES_BINARIES[@]} "
126-
127- echo " Building Environment 3 (Python Libraries)..."
128- LIB_ENV_EXISTS=0
129-
130- if [ -d " ${LIBRARIES_ENV_DIR} " ]; then
131- # SCENARIO A: Target environment already exists on host. Inject packages into it.
132- LIB_ENV_EXISTS=1
133- echo " Host environment detected at ${LIBRARIES_ENV_DIR} . Injecting data science packages..."
134-
135- if mamba list -p " ${LIBRARIES_ENV_DIR} " --full-name python --json | jq -e ' length == 0' > /dev/null; then
136- echo " No Python installation found in host environment. Adding python=${LIB_PYTHON_VERSION} to package list."
137- CONDA_PACKAGES_LIBRARIES+=(" conda-forge::python=${LIB_PYTHON_VERSION} " )
138- fi
139- mamba install --prefix " ${LIBRARIES_ENV_DIR} " -y " ${CONDA_PACKAGES_LIBRARIES[@]} "
140- else
141- # SCENARIO B: Target environment does not exist. Create it from scratch.
142- echo " No host environment found. Creating standalone environment at ${LIBRARIES_ENV_DIR} ..."
143- mkdir -p " $( dirname " ${LIBRARIES_ENV_DIR} " ) "
144-
145- CONDA_PACKAGES_LIBRARIES+=(" conda-forge::python=${LIB_PYTHON_VERSION} " )
146- mamba create --prefix " ${LIBRARIES_ENV_DIR} " -y " ${CONDA_PACKAGES_LIBRARIES[@]} "
147- fi
95+ mamba create --prefix " ${WORKBENCH_TOOLS_DIR} /1" -c bioconda -y " ${CONDA_PACKAGES_1[@]} "
96+ mamba create --prefix " ${WORKBENCH_TOOLS_DIR} /2" -c bioconda -y " ${CONDA_PACKAGES_2[@]} "
14897
14998# Install dsub via pip if on GCP. The conda version is outdated.
150- # dsub is installed in LIBRARIES_ENV_DIR because it can be used as a Python
151- # library, and users may want to install additional packages alongside it.
152- # PYTHONNOUSERSITE=1 prevents pip from seeing/modifying packages in user site-packages.
15399if [[ " ${CLOUD} " == " gcp" ]]; then
154- PYTHONNOUSERSITE=1 " ${LIBRARIES_ENV_DIR} /bin/pip" install dsub
100+ " ${WORKBENCH_TOOLS_DIR} /2 /bin/pip" install dsub
155101fi
156102
157103# Force the perl and python scripts to use the correct perl/python
158- find -L " ${BINARIES_ENV_DIR} /bin" -type f -executable -exec \
104+ find -L " ${WORKBENCH_TOOLS_DIR} /2 /bin" -type f -executable -exec \
159105 sed -i --follow-symlinks \
160- -e " 1s|^#\!/usr/bin/env perl\\ r\?$|#\!${BINARIES_ENV_DIR} /bin/perl|" \
161- -e " 1s|^#\!/usr/bin/env python\\ r\?$|#\!${BINARIES_ENV_DIR} /bin/python|" {} \;
106+ -e " 1s|^#\!/usr/bin/env perl\\ r\?$|#\!${WORKBENCH_TOOLS_DIR} /2 /bin/perl|" \
107+ -e " 1s|^#\!/usr/bin/env python\\ r\?$|#\!${WORKBENCH_TOOLS_DIR} /2 /bin/python|" {} \;
162108
163- # Make the login user the owner of the conda environments
109+ # Make the login user the owner of the conda environment
164110chown -R " ${USERNAME} :" " ${WORKBENCH_TOOLS_DIR} "
165- chown -R " ${USERNAME} :" " ${LIBRARIES_ENV_DIR} "
166111
167112{
168- echo " # Workbench Tools Configuration"
169-
170- # If we created a standalone Python libraries environment from scratch, make it the default terminal Python.
171- # If it already existed (LIB_ENV_EXISTS=1), we leave the host image's PATH untouched to prevent shadowing.
172- if [[ " ${LIB_ENV_EXISTS} " == " 0" ]]; then
173- # shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
174- printf ' export PATH="%s:$PATH"\n' " ${LIBRARIES_ENV_DIR} /bin"
175- fi
176-
177113 # Set PATH to include workbench-tools binaries
178114 # shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
179- printf ' export PATH="$PATH:%s "\n' " ${SAMTOOLS_ENV_DIR} / bin:${BINARIES_ENV_DIR} /bin"
115+ printf ' export PATH="%s:$PATH "\n' " ${WORKBENCH_TOOLS_DIR} /1/ bin:${WORKBENCH_TOOLS_DIR} /2 /bin"
180116
181117 # Set CROMWELL_JAR environment variable
182- printf ' export CROMWELL_JAR="%s"\n' " ${BINARIES_ENV_DIR} /share/cromwell/cromwell.jar"
118+ printf ' export CROMWELL_JAR="%s"\n' " ${WORKBENCH_TOOLS_DIR} /2/share/cromwell/cromwell.jar"
119+
120+ # Make dsub a function that includes the correct PYTHONPATH. NeMo sets
121+ # PYTHONPATH so we need to override it here. We use a function instead of an
122+ # alias because aliases are not expanded in non-interactive shells.
123+ # shellcheck disable=SC2016 # we want $PYTHONPATH to be evaluated at runtime
124+ printf ' function dsub() (PYTHONPATH="%s/2/lib/python3.9/site-packages:${PYTHONPATH:-}" "%s/2/bin/dsub" "$@")\n' " ${WORKBENCH_TOOLS_DIR} " " ${WORKBENCH_TOOLS_DIR} "
183125} >> " ${USER_HOME_DIR} /.bashrc"
184126
185127# Allow .bashrc to be sourced in non-interactive shells
186128sed -i ' /^# If not running interactively/,/esac/d' " ${USER_HOME_DIR} /.bashrc" || true
187129
188130# Make sure the login user is the owner of their .bashrc
189- chown " ${USERNAME} :" " ${USER_HOME_DIR} /.bashrc"
131+ chown -R " ${USERNAME} :" " ${USER_HOME_DIR} /.bashrc"
190132
191- echo " Workbench tools installation complete !"
133+ echo " Done !"
0 commit comments