Skip to content

Commit 4784cae

Browse files
committed
Add python libraries in a separate conda env
1 parent a0f3015 commit 4784cae

7 files changed

Lines changed: 114 additions & 31 deletions

File tree

features/src/workbench-tools/devcontainer-feature.json

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,16 @@
44
"name": "Workbench Tools",
55
"description": "Installs common tools for Workbench Apps. Currently it only supports Debian-based systems (e.g. Ubuntu) on x86_64.",
66
"options": {
7+
"libEnv": {
8+
"type": "string",
9+
"default": "/opt/conda/envs/workbench-ds",
10+
"description": "The path to the conda environment where Python libraries (e.g. numpy, plotly, scipy) should be installed. This could be a pre-existing environment or a new one."
11+
},
12+
"libPythonVersion": {
13+
"type": "string",
14+
"default": "3.10",
15+
"description": "The Python version to use for the installed libraries."
16+
},
717
"cloud": {
818
"type": "string",
919
"default": "",

features/src/workbench-tools/install-conda.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ install_miniforge() {
1919
local download_url
2020
download_url="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-$(uname -m).sh"
2121

22-
check_packages curl ca-certificates
2322
mkdir -p /tmp/miniforge
2423
(
2524
cd /tmp/miniforge

features/src/workbench-tools/install.sh

Lines changed: 80 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ set -o xtrace
1010

1111
readonly CLOUD="${CLOUD:-""}"
1212
readonly USERNAME="${USERNAME:-"root"}"
13+
readonly LIB_ENV="${LIBENV:-"/opt/conda/envs/workbench-ds"}"
14+
readonly LIB_PYTHON_VERSION="${LIBPYTHONVERSION:-"3.10"}"
1315
USER_HOME_DIR="${USERHOMEDIR:-"/home/${USERNAME}"}"
1416
if [[ "${USER_HOME_DIR}" == "/home/root" ]]; then
1517
USER_HOME_DIR="/root"
@@ -68,32 +70,79 @@ if ! mamba --version &>/dev/null; then
6870
fi
6971

7072
# Install the samtools family of tools in a separate environment since some of
71-
# the other tools depend on old versions of these. This will take priority in
72-
# the PATH.
73+
# the other tools depend on old versions of these.
7374
CONDA_PACKAGES_1=(
74-
"bcftools"
75-
"htslib" # includes bgzip and tabix
76-
"samtools"
75+
"bioconda::bcftools"
76+
"bioconda::htslib" # includes bgzip and tabix
77+
"bioconda::samtools"
7778
)
7879

80+
# Environment 2 contains the genomics CLI tools. They will be added to the
81+
# PATH but will not be usable as Python libraries.
7982
CONDA_PACKAGES_2=(
80-
"python=3.9"
81-
"pip"
82-
"perl==5.32.1"
83-
"bedtools"
83+
"conda-forge::python=3.10"
84+
"conda-forge::pip"
85+
"conda-forge::perl==5.32.1"
86+
"bioconda::bedtools"
8487
"conda-forge::bgenix"
8588
"conda-forge::cromwell"
86-
"ensembl-vep>=115.1"
87-
"nextflow"
88-
"plink"
89-
"plink2"
90-
"regenie"
91-
"vcftools"
89+
"bioconda::ensembl-vep>=115.1"
90+
"bioconda::nextflow"
91+
"bioconda::plink"
92+
"bioconda::plink2"
93+
"bioconda::regenie"
94+
"bioconda::vcftools"
9295
)
9396

97+
# Environment 3 contains data science Python libraries. These should be
98+
# accessible from the user's default Python environment, which is why we install
99+
# them separately and give the user control over whether to inject them into an
100+
# existing environment or create a new one.
101+
CONDA_PACKAGES_3=(
102+
"conda-forge::google-cloud-storage"
103+
"conda-forge::ipykernel"
104+
"conda-forge::ipywidgets"
105+
"conda-forge::jupyter"
106+
"conda-forge::openai"
107+
"conda-forge::matplotlib"
108+
"conda-forge::numpy"
109+
"conda-forge::plotly"
110+
"conda-forge::pandas"
111+
"conda-forge::seaborn"
112+
"conda-forge::scikit-learn"
113+
"conda-forge::scipy"
114+
"conda-forge::tqdm"
115+
)
116+
117+
# Build isolated environments
94118
mkdir -p "${WORKBENCH_TOOLS_DIR}"
95-
mamba create --prefix "${WORKBENCH_TOOLS_DIR}/1" -c bioconda -y "${CONDA_PACKAGES_1[@]}"
96-
mamba create --prefix "${WORKBENCH_TOOLS_DIR}/2" -c bioconda -y "${CONDA_PACKAGES_2[@]}"
119+
echo "Building Environment 1 (Samtools family)..."
120+
mamba create --prefix "${WORKBENCH_TOOLS_DIR}/1" -y "${CONDA_PACKAGES_1[@]}"
121+
122+
echo "Building Environment 2 (Genomics CLI Tools)..."
123+
mamba create --prefix "${WORKBENCH_TOOLS_DIR}/2" -y "${CONDA_PACKAGES_2[@]}"
124+
125+
echo "Building Environment 3 (Python Libraries)..."
126+
LIB_ENV_EXISTS=0
127+
128+
if [ -d "${LIB_ENV}" ]; then
129+
# SCENARIO A: Target environment already exists on host. Inject packages into it.
130+
LIB_ENV_EXISTS=1
131+
echo "Host environment detected at ${LIB_ENV}. Injecting data science packages..."
132+
133+
if mamba list -p /opt/conda/envs/jupyter --full-name python --json | jq -e 'length == 0' >/dev/null; then
134+
echo "No Python installation found in host environment. Adding python=${LIB_PYTHON_VERSION} to package list."
135+
CONDA_PACKAGES_3+=("conda-forge::python=${LIB_PYTHON_VERSION}")
136+
fi
137+
mamba install --prefix "${LIB_ENV}" -y "${CONDA_PACKAGES_3[@]}"
138+
else
139+
# SCENARIO B: Target environment does not exist. Create it from scratch.
140+
echo "No host environment found. Creating standalone environment at ${LIB_ENV}..."
141+
mkdir -p "$(dirname "${LIB_ENV}")"
142+
143+
CONDA_PACKAGES_3+=("conda-forge::python=${LIB_PYTHON_VERSION}")
144+
mamba create --prefix "${LIB_ENV}" -y "${CONDA_PACKAGES_3[@]}"
145+
fi
97146

98147
# Install dsub via pip if on GCP. The conda version is outdated.
99148
if [[ "${CLOUD}" == "gcp" ]]; then
@@ -106,28 +155,32 @@ find -L "${WORKBENCH_TOOLS_DIR}/2/bin" -type f -executable -exec \
106155
-e "1s|^#\!/usr/bin/env perl\\r\?$|#\!${WORKBENCH_TOOLS_DIR}/2/bin/perl|" \
107156
-e "1s|^#\!/usr/bin/env python\\r\?$|#\!${WORKBENCH_TOOLS_DIR}/2/bin/python|" {} \;
108157

109-
# Make the login user the owner of the conda environment
158+
# Make the login user the owner of the conda environments
110159
chown -R "${USERNAME}:" "${WORKBENCH_TOOLS_DIR}"
160+
chown -R "${USERNAME}:" "${LIB_ENV}"
111161

112162
{
163+
echo "# Workbench Tools Configuration"
164+
165+
# If we created a standalone Python libraries environment from scratch, make it the default terminal Python.
166+
# If it already existed (LIB_ENV_EXISTS=1), we leave the host image's PATH untouched to prevent shadowing.
167+
if [[ "${LIB_ENV_EXISTS}" == "0" ]]; then
168+
# shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
169+
printf 'export PATH="%s:$PATH"\n' "${LIB_ENV}/bin"
170+
fi
171+
113172
# Set PATH to include workbench-tools binaries
114173
# shellcheck disable=SC2016 # we want $PATH to be evaluated at runtime
115-
printf 'export PATH="%s:$PATH"\n' "${WORKBENCH_TOOLS_DIR}/1/bin:${WORKBENCH_TOOLS_DIR}/2/bin"
174+
printf 'export PATH="$PATH:%s"\n' "${WORKBENCH_TOOLS_DIR}/1/bin:${WORKBENCH_TOOLS_DIR}/2/bin"
116175

117176
# Set CROMWELL_JAR environment variable
118177
printf 'export CROMWELL_JAR="%s"\n' "${WORKBENCH_TOOLS_DIR}/2/share/cromwell/cromwell.jar"
119-
120-
# Make dsub a function that includes the correct PYTHONPATH. NeMo sets
121-
# PYTHONPATH so we need to override it here. We use a function instead of an
122-
# alias because aliases are not expanded in non-interactive shells.
123-
# shellcheck disable=SC2016 # we want $PYTHONPATH to be evaluated at runtime
124-
printf 'function dsub() (PYTHONPATH="%s/2/lib/python3.9/site-packages:${PYTHONPATH:-}" "%s/2/bin/dsub" "$@")\n' "${WORKBENCH_TOOLS_DIR}" "${WORKBENCH_TOOLS_DIR}"
125178
} >> "${USER_HOME_DIR}/.bashrc"
126179

127180
# Allow .bashrc to be sourced in non-interactive shells
128181
sed -i '/^# If not running interactively/,/esac/d' "${USER_HOME_DIR}/.bashrc" || true
129182

130183
# Make sure the login user is the owner of their .bashrc
131-
chown -R "${USERNAME}:" "${USER_HOME_DIR}/.bashrc"
184+
chown "${USERNAME}:" "${USER_HOME_DIR}/.bashrc"
132185

133-
echo "Done!"
186+
echo "Workbench tools installation complete!"

src/custom-workbench-jupyter-template/.devcontainer.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
],
2323
"features": {
2424
"./.devcontainer/features/workbench-tools": {
25+
"libEnv": "/opt/conda/envs/jupyter", // Use the jupyter conda environment
26+
"libPythonVersion": "3.10", // This needs to match the Python version in the Dockerfile
2527
"cloud": "${templateOption:cloud}",
2628
"username": "jupyter",
2729
"userHomeDir": "/home/jupyter"

src/jupyter-aou/.devcontainer.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
"postStartCommand": "./startupscript/remount-on-restart.sh jupyter /home/jupyter \"${templateOption:cloud}\" \"${templateOption:login}\"",
1212
"features": {
1313
"./.devcontainer/features/workbench-tools": {
14+
"libEnv": "/opt/conda/envs/jupyter", // Use the jupyter conda environment
15+
"libPythonVersion": "3.10", // This needs to match the Python version in the Dockerfile
1416
"cloud": "${templateOption:cloud}",
1517
"username": "jupyter",
1618
"userHomeDir": "/home/jupyter"

startupscript/post-startup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ cat << EOF >> "${USER_BASHRC}"
172172
### BEGIN: Workbench-specific customizations ###
173173
174174
# Prepend "/usr/bin" (if not already in the path)
175-
if [[ "\${PATH}:" != "/usr/bin:"* ]]; then
175+
if [[ ":\${PATH}:" != *":/usr/bin:"* ]]; then
176176
export PATH=/usr/bin:\${PATH}
177177
fi
178178

test/test.sh

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,18 @@ function check() {
1111
check_user "${TEST_USER}" "$@"
1212
}
1313

14+
sudo -u "${TEST_USER}" bash -l -c "cat ~/.bashrc"
15+
1416
# Template specific tests
1517
check "gcsfuse" gcsfuse -v
1618
check "wb cli" wb version
1719
check "fuse.conf user_allow_other" grep -qE "^[[:space:]]*[^#]*user_allow_other" "/etc/fuse.conf"
1820

1921
# The workbench-tools feature should install these
2022
if [[ "$HAS_WORKBENCH_TOOLS" == "true" ]]; then
21-
check "python3" python3 --version
23+
check "python3" "which python3 && python3 --version"
2224
check "python3: venv" 'python3 -c "import venv"'
23-
check "pip3" pip3 --version
25+
check "pip3" "which pip3 && pip3 --version"
2426
if [[ "$TEMPLATE_ID" != "nemo_jupyter" ]] && [[ "$TEMPLATE_ID" != "nemo_jupyter_aou" ]]; then
2527
check "cromwell" cromwell --version
2628
fi
@@ -44,6 +46,21 @@ if [[ "$HAS_WORKBENCH_TOOLS" == "true" ]]; then
4446
check "vep: filter_vep" "filter_vep --help > /dev/null"
4547
check "vep: variant_recoder" "variant_recoder --help | head -n10"
4648
check "vep: haplo" "haplo --help | head -n10"
49+
50+
# Python packages
51+
check "python: google-cloud-storage" 'python3 -c "import google.cloud.storage"'
52+
check "python: ipykernel" 'python3 -c "import ipykernel"'
53+
check "python: ipywidgets" 'python3 -c "import ipywidgets"'
54+
check "python: jupyter" 'python3 -c "import jupyter"'
55+
check "python: openai" 'python3 -c "import openai"'
56+
check "python: matplotlib" 'python3 -c "import matplotlib"'
57+
check "python: numpy" 'python3 -c "import numpy"'
58+
check "python: plotly" 'python3 -c "import plotly"'
59+
check "python: pandas" 'python3 -c "import pandas"'
60+
check "python: seaborn" 'python3 -c "import seaborn"'
61+
check "python: scikit-learn" 'python3 -c "import sklearn"'
62+
check "python: scipy" 'python3 -c "import scipy"'
63+
check "python: tqdm" 'python3 -c "import tqdm"'
4764
fi
4865

4966
# The postgres-client feature should install these

0 commit comments

Comments
 (0)