Skip to content

Commit 384b602

Browse files
committed
Revert "Add python libraries in a separate conda env (verily-src#333)"
This reverts commit 22a8a27.
1 parent bf7a78e commit 384b602

10 files changed

Lines changed: 42 additions & 136 deletions

File tree

features/src/workbench-tools/devcontainer-feature.json

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,6 @@
44
"name": "Workbench Tools",
55
"description": "Installs common tools for Workbench Apps. Currently it only supports Debian-based systems (e.g. Ubuntu) on x86_64.",
66
"options": {
7-
"libEnv": {
8-
"type": "string",
9-
"default": "/opt/conda/envs/workbench-ds",
10-
"description": "The path to the conda environment where Python libraries (e.g. numpy, plotly, scipy) should be installed. This could be a pre-existing environment or a new one."
11-
},
12-
"libPythonVersion": {
13-
"type": "string",
14-
"default": "3.14",
15-
"description": "The Python version to use for the installed libraries."
16-
},
177
"cloud": {
188
"type": "string",
199
"default": "",

features/src/workbench-tools/install-conda.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ install_miniforge() {
1919
local download_url
2020
download_url="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-$(uname -m).sh"
2121

22+
check_packages curl ca-certificates
2223
mkdir -p /tmp/miniforge
2324
(
2425
cd /tmp/miniforge

features/src/workbench-tools/install.sh

Lines changed: 35 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ set -o xtrace
1010

1111
readonly CLOUD="${CLOUD:-""}"
1212
readonly USERNAME="${USERNAME:-"root"}"
13-
readonly LIBRARIES_ENV_DIR="${LIBENV:-"/opt/conda/envs/workbench-ds"}"
14-
readonly LIB_PYTHON_VERSION="${LIBPYTHONVERSION:-"3.14"}"
1513
USER_HOME_DIR="${USERHOMEDIR:-"/home/${USERNAME}"}"
1614
if [[ "${USER_HOME_DIR}" == "/home/root" ]]; then
1715
USER_HOME_DIR="/root"
@@ -70,122 +68,66 @@ if ! mamba --version &>/dev/null; then
7068
fi
7169

7270
# Install the samtools family of tools in a separate environment since some of
73-
# the other tools depend on old versions of these.
74-
readonly CONDA_PACKAGES_SAMTOOLS=(
75-
"bioconda::bcftools>=1.23"
76-
"bioconda::htslib>=1.23" # includes bgzip and tabix
77-
"bioconda::samtools>=1.23"
71+
# the other tools depend on old versions of these. This will take priority in
72+
# the PATH.
73+
CONDA_PACKAGES_1=(
74+
"bcftools"
75+
"htslib" # includes bgzip and tabix
76+
"samtools"
7877
)
79-
readonly SAMTOOLS_ENV_DIR="${WORKBENCH_TOOLS_DIR}/samtools"
80-
81-
# Environment 2 contains the genomics CLI tools. They will be added to the
82-
# PATH but will not be usable as Python libraries.
83-
readonly CONDA_PACKAGES_BINARIES=(
84-
"conda-forge::python"
85-
"conda-forge::pip"
86-
"conda-forge::perl>=5.32"
87-
"bioconda::bedtools"
78+
79+
CONDA_PACKAGES_2=(
80+
"python=3.9"
81+
"pip"
82+
"perl==5.32.1"
83+
"bedtools"
8884
"conda-forge::bgenix"
8985
"conda-forge::cromwell"
90-
"bioconda::ensembl-vep>=115"
91-
"bioconda::nextflow"
92-
"bioconda::plink"
93-
"bioconda::plink2"
94-
"bioconda::regenie"
95-
"bioconda::vcftools"
96-
)
97-
readonly BINARIES_ENV_DIR="${WORKBENCH_TOOLS_DIR}/binaries"
98-
99-
# Environment 3 contains data science Python libraries. These should be
100-
# accessible from the user's default Python environment, which is why we install
101-
# them separately and give the user control over whether to inject them into an
102-
# existing environment or create a new one.
103-
CONDA_PACKAGES_LIBRARIES=(
104-
"conda-forge::google-cloud-storage"
105-
"conda-forge::ipykernel"
106-
"conda-forge::ipywidgets"
107-
"conda-forge::jupyter"
108-
"conda-forge::openai"
109-
"conda-forge::matplotlib"
110-
"conda-forge::numpy"
111-
"conda-forge::plotly"
112-
"conda-forge::pandas"
113-
"conda-forge::seaborn"
114-
"conda-forge::scikit-learn"
115-
"conda-forge::scipy"
116-
"conda-forge::tqdm"
86+
"ensembl-vep>=115.1"
87+
"nextflow"
88+
"plink"
89+
"plink2"
90+
"regenie"
91+
"vcftools"
11792
)
11893

119-
# Build isolated environments
12094
mkdir -p "${WORKBENCH_TOOLS_DIR}"
121-
echo "Building Environment 1 (Samtools family)..."
122-
mamba create --prefix "${SAMTOOLS_ENV_DIR}" -y "${CONDA_PACKAGES_SAMTOOLS[@]}"
123-
124-
echo "Building Environment 2 (Genomics CLI Tools)..."
125-
mamba create --prefix "${BINARIES_ENV_DIR}" -y "${CONDA_PACKAGES_BINARIES[@]}"
126-
127-
echo "Building Environment 3 (Python Libraries)..."
128-
LIB_ENV_EXISTS=0
129-
130-
if [ -d "${LIBRARIES_ENV_DIR}" ]; then
131-
# SCENARIO A: Target environment already exists on host. Inject packages into it.
132-
LIB_ENV_EXISTS=1
133-
echo "Host environment detected at ${LIBRARIES_ENV_DIR}. Injecting data science packages..."
134-
135-
if mamba list -p "${LIBRARIES_ENV_DIR}" --full-name python --json | jq -e 'length == 0' >/dev/null; then
136-
echo "No Python installation found in host environment. Adding python=${LIB_PYTHON_VERSION} to package list."
137-
CONDA_PACKAGES_LIBRARIES+=("conda-forge::python=${LIB_PYTHON_VERSION}")
138-
fi
139-
mamba install --prefix "${LIBRARIES_ENV_DIR}" -y "${CONDA_PACKAGES_LIBRARIES[@]}"
140-
else
141-
# SCENARIO B: Target environment does not exist. Create it from scratch.
142-
echo "No host environment found. Creating standalone environment at ${LIBRARIES_ENV_DIR}..."
143-
mkdir -p "$(dirname "${LIBRARIES_ENV_DIR}")"
144-
145-
CONDA_PACKAGES_LIBRARIES+=("conda-forge::python=${LIB_PYTHON_VERSION}")
146-
mamba create --prefix "${LIBRARIES_ENV_DIR}" -y "${CONDA_PACKAGES_LIBRARIES[@]}"
147-
fi
95+
mamba create --prefix "${WORKBENCH_TOOLS_DIR}/1" -c bioconda -y "${CONDA_PACKAGES_1[@]}"
96+
mamba create --prefix "${WORKBENCH_TOOLS_DIR}/2" -c bioconda -y "${CONDA_PACKAGES_2[@]}"
14897

14998
# Install dsub via pip if on GCP. The conda version is outdated.
150-
# dsub is installed in LIBRARIES_ENV_DIR because it can be used as a Python
151-
# library, and users may want to install additional packages alongside it.
152-
# PYTHONNOUSERSITE=1 prevents pip from seeing/modifying packages in user site-packages.
15399
if [[ "${CLOUD}" == "gcp" ]]; then
154-
PYTHONNOUSERSITE=1 "${LIBRARIES_ENV_DIR}/bin/pip" install dsub
100+
"${WORKBENCH_TOOLS_DIR}/2/bin/pip" install dsub
155101
fi
156102

157103
# Force the perl and python scripts to use the correct perl/python
158-
find -L "${BINARIES_ENV_DIR}/bin" -type f -executable -exec \
104+
find -L "${WORKBENCH_TOOLS_DIR}/2/bin" -type f -executable -exec \
159105
sed -i --follow-symlinks \
160-
-e "1s|^#\!/usr/bin/env perl\\r\?$|#\!${BINARIES_ENV_DIR}/bin/perl|" \
161-
-e "1s|^#\!/usr/bin/env python\\r\?$|#\!${BINARIES_ENV_DIR}/bin/python|" {} \;
106+
-e "1s|^#\!/usr/bin/env perl\\r\?$|#\!${WORKBENCH_TOOLS_DIR}/2/bin/perl|" \
107+
-e "1s|^#\!/usr/bin/env python\\r\?$|#\!${WORKBENCH_TOOLS_DIR}/2/bin/python|" {} \;
162108

163-
# Make the login user the owner of the conda environments
109+
# Make the login user the owner of the conda environment
164110
chown -R "${USERNAME}:" "${WORKBENCH_TOOLS_DIR}"
165-
chown -R "${USERNAME}:" "${LIBRARIES_ENV_DIR}"
166111

167112
{
168-
echo "# Workbench Tools Configuration"
169-
170-
# If we created a standalone Python libraries environment from scratch, make it the default terminal Python.
171-
# If it already existed (LIB_ENV_EXISTS=1), we leave the host image's PATH untouched to prevent shadowing.
172-
if [[ "${LIB_ENV_EXISTS}" == "0" ]]; then
173-
# shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
174-
printf 'export PATH="%s:$PATH"\n' "${LIBRARIES_ENV_DIR}/bin"
175-
fi
176-
177113
# Set PATH to include workbench-tools binaries
178114
# shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
179-
printf 'export PATH="$PATH:%s"\n' "${SAMTOOLS_ENV_DIR}/bin:${BINARIES_ENV_DIR}/bin"
115+
printf 'export PATH="%s:$PATH"\n' "${WORKBENCH_TOOLS_DIR}/1/bin:${WORKBENCH_TOOLS_DIR}/2/bin"
180116

181117
# Set CROMWELL_JAR environment variable
182-
printf 'export CROMWELL_JAR="%s"\n' "${BINARIES_ENV_DIR}/share/cromwell/cromwell.jar"
118+
printf 'export CROMWELL_JAR="%s"\n' "${WORKBENCH_TOOLS_DIR}/2/share/cromwell/cromwell.jar"
119+
120+
# Make dsub a function that includes the correct PYTHONPATH. NeMo sets
121+
# PYTHONPATH so we need to override it here. We use a function instead of an
122+
# alias because aliases are not expanded in non-interactive shells.
123+
# shellcheck disable=SC2016 # we want $PYTHONPATH to be evaluated at runtime
124+
printf 'function dsub() (PYTHONPATH="%s/2/lib/python3.9/site-packages:${PYTHONPATH:-}" "%s/2/bin/dsub" "$@")\n' "${WORKBENCH_TOOLS_DIR}" "${WORKBENCH_TOOLS_DIR}"
183125
} >> "${USER_HOME_DIR}/.bashrc"
184126

185127
# Allow .bashrc to be sourced in non-interactive shells
186128
sed -i '/^# If not running interactively/,/esac/d' "${USER_HOME_DIR}/.bashrc" || true
187129

188130
# Make sure the login user is the owner of their .bashrc
189-
chown "${USERNAME}:" "${USER_HOME_DIR}/.bashrc"
131+
chown -R "${USERNAME}:" "${USER_HOME_DIR}/.bashrc"
190132

191-
echo "Workbench tools installation complete!"
133+
echo "Done!"

src/custom-workbench-jupyter-template/.devcontainer.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
],
2323
"features": {
2424
"./.devcontainer/features/workbench-tools": {
25-
"libEnv": "/opt/conda/envs/jupyter", // Use the jupyter conda environment
2625
"cloud": "${templateOption:cloud}",
2726
"username": "jupyter",
2827
"userHomeDir": "/home/jupyter"

src/jupyter-aou/.devcontainer.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
"postStartCommand": "./startupscript/remount-on-restart.sh jupyter /home/jupyter \"${templateOption:cloud}\" \"${templateOption:login}\"",
1212
"features": {
1313
"./.devcontainer/features/workbench-tools": {
14-
"libEnv": "/opt/conda/envs/jupyter", // Use the jupyter conda environment
1514
"cloud": "${templateOption:cloud}",
1615
"username": "jupyter",
1716
"userHomeDir": "/home/jupyter"

src/nemo_jupyter/.devcontainer.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
},
2626
"ghcr.io/dhoeric/features/google-cloud-cli@sha256:fa5d894718825c5ad8009ac8f2c9f0cea3d1661eb108a9d465cba9f3fc48965f": {},
2727
"./.devcontainer/features/workbench-tools": {
28-
"libPythonVersion": "3.12", // Must match python version in nemo image
2928
"cloud": "${templateOption:cloud}",
3029
"username": "jupyter",
3130
"userHomeDir": "/home/jupyter"

src/nemo_jupyter/Dockerfile

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,9 @@ RUN useradd --uid ${NB_UID} --gid ${NB_GID} --create-home --home-dir ${WORKDIR}
1717
# Fix ownership for common dirs
1818
&& mkdir -p /workspace \
1919
&& chown -R ${NB_UID}:${NB_GID} ${WORKDIR} /workspace /tmp \
20-
&& chown -R ${NB_UID}:${NB_GID} /opt/conda || true
21-
22-
# Add shell functions to override wb commands (functions work in non-interactive shells, unlike aliases)
23-
RUN <<EOF cat >> ${WORKDIR}/.bashrc
24-
weightsbiases() { /usr/local/bin/wb "\$@"; }
25-
wb() { /usr/bin/wb "\$@"; }
26-
wandb() { /opt/venv/bin/wb "\$@"; }
27-
export -f weightsbiases wb wandb
28-
EOF
20+
&& chown -R ${NB_UID}:${NB_GID} /opt/conda || true \
21+
# Add alias for weightsbiases
22+
&& printf 'alias weightsbiases="/usr/local/bin/wb"\nalias wb="/usr/bin/wb"\n' >> ${WORKDIR}/.bashrc
2923

3024
# Environment and working directory
3125
ENV HOME=${WORKDIR}

src/nemo_jupyter_aou/.devcontainer.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
},
2727
"ghcr.io/dhoeric/features/google-cloud-cli@sha256:fa5d894718825c5ad8009ac8f2c9f0cea3d1661eb108a9d465cba9f3fc48965f": {},
2828
"./.devcontainer/features/workbench-tools": {
29-
"libPythonVersion": "3.12", // Must match python version in nemo image
3029
"cloud": "${templateOption:cloud}",
3130
"username": "jupyter",
3231
"userHomeDir": "/home/jupyter"

startupscript/post-startup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ cat << EOF >> "${USER_BASHRC}"
201201
### BEGIN: Workbench-specific customizations ###
202202
203203
# Prepend "/usr/bin" (if not already in the path)
204-
if [[ ":\${PATH}:" != *":/usr/bin:"* ]]; then
204+
if [[ "\${PATH}:" != "/usr/bin:"* ]]; then
205205
export PATH=/usr/bin:\${PATH}
206206
fi
207207

test/test.sh

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,16 @@ function check() {
1111
check_user "${TEST_USER}" "$@"
1212
}
1313

14-
sudo -u "${TEST_USER}" bash -l -c "echo PATH: $PATH"
15-
1614
# Template specific tests
1715
check "gcsfuse" gcsfuse -v
1816
check "wb cli" wb version
1917
check "fuse.conf user_allow_other" grep -qE "^[[:space:]]*[^#]*user_allow_other" "/etc/fuse.conf"
2018

2119
# The workbench-tools feature should install these
2220
if [[ "$HAS_WORKBENCH_TOOLS" == "true" ]]; then
23-
check "python3" "which python3 && python3 --version"
21+
check "python3" python3 --version
2422
check "python3: venv" 'python3 -c "import venv"'
25-
check "pip3" "which pip3 && pip3 --version"
23+
check "pip3" pip3 --version
2624
if [[ "$TEMPLATE_ID" != "nemo_jupyter" ]] && [[ "$TEMPLATE_ID" != "nemo_jupyter_aou" ]]; then
2725
check "cromwell" cromwell --version
2826
fi
@@ -46,21 +44,6 @@ if [[ "$HAS_WORKBENCH_TOOLS" == "true" ]]; then
4644
check "vep: filter_vep" "filter_vep --help > /dev/null"
4745
check "vep: variant_recoder" "variant_recoder --help | head -n10"
4846
check "vep: haplo" "haplo --help | head -n10"
49-
50-
# Python packages
51-
check "python: google-cloud-storage" 'python3 -c "import google.cloud.storage"'
52-
check "python: ipykernel" 'python3 -c "import ipykernel"'
53-
check "python: ipywidgets" 'python3 -c "import ipywidgets"'
54-
check "python: jupyter" 'python3 -c "import jupyter"'
55-
check "python: openai" 'python3 -c "import openai"'
56-
check "python: matplotlib" 'python3 -c "import matplotlib"'
57-
check "python: numpy" 'python3 -c "import numpy"'
58-
check "python: plotly" 'python3 -c "import plotly"'
59-
check "python: pandas" 'python3 -c "import pandas"'
60-
check "python: seaborn" 'python3 -c "import seaborn"'
61-
check "python: scikit-learn" 'python3 -c "import sklearn"'
62-
check "python: scipy" 'python3 -c "import scipy"'
63-
check "python: tqdm" 'python3 -c "import tqdm"'
6447
fi
6548

6649
# The postgres-client feature should install these

0 commit comments

Comments
 (0)