-
Notifications
You must be signed in to change notification settings - Fork 787
Expand file tree
/
Copy pathDockerfile.deepsomatic
More file actions
211 lines (175 loc) · 8.4 KB
/
Dockerfile.deepsomatic
File metadata and controls
211 lines (175 loc) · 8.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# Copyright 2019 Google LLC.
# This is used to build the DeepSomatic release docker image.
# It can also be used to build local images, especially if you've made changes
# to the code.
# Example command:
# $ git clone https://github.com/google/deepvariant.git
# $ cd deepvariant
# $ sudo docker build -f Dockerfile.deepsomatic -t deepsomatic .
#
# To build for GPU, use a command like:
# $ sudo docker build -f Dockerfile.deepsomatic --build-arg=FROM_IMAGE=nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04 --build-arg=DV_GPU_BUILD=1 -t deepsomatic_gpu .
ARG FROM_IMAGE=ubuntu:22.04
# PYTHON_VERSION is also set in settings.sh.
ARG PYTHON_VERSION=3.10
ARG DV_GPU_BUILD=0
ARG VERSION=1.10.0
ARG TF_ENABLE_ONEDNN_OPTS=1
#======================================#
# Stage 1: Install samtools + bcftools #
#======================================#
FROM condaforge/miniforge3:24.9.2-0 AS hts_utils
RUN conda config --add channels bioconda
RUN conda create -n bio \
bioconda::bcftools=1.15 \
bioconda::samtools=1.15 \
&& conda clean -a
#==========================#
# Stage 2: Download Models #
#==========================#
FROM alpine:3.19 AS download_models
ARG VERSION
RUN apk add --no-cache wget parallel
# Copy models
# The hybrid and ont models mix naming conventions:
# hybrid_pacbio_illumina --> hybrid
# ont_r104 --> ont
RUN parallel --verbose --halt now,fail=1 --verbose --jobs 10 \
"mkdir -p /opt/models/deepsomatic/{1}/variables && wget -O /opt/models/deepsomatic/{1}/{2} https://storage.googleapis.com/deepvariant/models/DeepSomatic/${VERSION}/savedmodels/deepsomatic.{1}.savedmodel/{2}" ::: \
wgs wes pacbio ont ffpe_wgs ffpe_wes wgs_tumor_only wes_tumor_only pacbio_tumor_only ont_tumor_only ffpe_wgs_tumor_only ffpe_wes_tumor_only ::: \
fingerprint.pb saved_model.pb model.example_info.json example_info.json variables/variables.data-00000-of-00001 variables/variables.index && \
chmod -R +r /opt/models/
# Adapt the code below if you are testing custom models.
# WORKDIR /opt/models/deepsomatic/wes
# ADD https://storage.googleapis.com/deepvariant/models/DeepSomatic/${VERSION}/savedmodels/deepsomatic.wes.savedmodel/fingerprint.pb \
# https://storage.googleapis.com/deepvariant/models/DeepSomatic/${VERSION}/savedmodels/deepsomatic.wes.savedmodel/saved_model.pb \
# https://storage.googleapis.com/deepvariant/models/DeepSomatic/${VERSION}/savedmodels/deepsomatic.wes.savedmodel/example_info.json ./
# WORKDIR /opt/models/deepsomatic/wes/variables
# ADD https://storage.googleapis.com/deepvariant/models/DeepSomatic/${VERSION}/savedmodels/deepsomatic.wes.savedmodel/variables/variables.data-00000-of-00001 \
# https://storage.googleapis.com/deepvariant/models/DeepSomatic/${VERSION}/savedmodels/deepsomatic.wes.savedmodel/variables/variables.index ./
# RUN chmod -R +r /opt/models/deepsomatic/wes/*
# PONs and AF VCF files
RUN mkdir -p /opt/models/deepsomatic/pons && \
parallel --halt now,fail=1 --verbose --jobs 10 \
"wget -O /opt/models/deepsomatic/pons/{} https://storage.googleapis.com/deepvariant/models/DeepSomatic/${VERSION}/pons/{} && \
chmod -R +r /opt/models/deepsomatic/pons" ::: \
AF_ilmn_PON_DeepVariant.GRCh38.AF0.05.vcf.gz \
AF_ilmn_PON_DeepVariant.GRCh38.AF0.05.vcf.gz.tbi \
AF_pacbio_PON_CoLoRSdb.GRCh38.AF0.05.vcf.gz \
AF_pacbio_PON_CoLoRSdb.GRCh38.AF0.05.vcf.gz.tbi \
PON_dbsnp138_gnomad_ILMN1000g_pon.vcf.gz \
PON_dbsnp138_gnomad_ILMN1000g_pon.vcf.gz.tbi \
PON_dbsnp138_gnomad_PB1000g_pon.vcf.gz \
PON_dbsnp138_gnomad_PB1000g_pon.vcf.gz.tbi
# Download small models
RUN parallel --halt now,fail=1 --verbose --jobs 10 \
"mkdir -p /opt/smallmodels/{1}/variables && wget -O /opt/smallmodels/{1}/{2} https://storage.googleapis.com/deepvariant/models/DeepSomatic/${VERSION}/smallmodels/deepsomatic.{1}.smallmodel/{2}" ::: \
wgs ffpe_wgs pacbio ont ::: \
model.keras && \
chmod -R +r /opt/smallmodels/
#===================#
# Stage 3: Build DS #
#===================#
FROM ${FROM_IMAGE} AS prereq
LABEL maintainer="https://github.com/google/deepvariant/issues"
# DV_GPU_BUILD, PYTHON_VERSION, and TF_ENABLE_ONEDNN_OPTS are used by
# ./build-prereq.sh and by tensorflow during the build.
ARG DV_GPU_BUILD
ENV DV_GPU_BUILD=${DV_GPU_BUILD}
ARG PYTHON_VERSION
ENV PYTHON_VERSION ${PYTHON_VERSION}
ARG TF_ENABLE_ONEDNN_OPTS
ENV TF_ENABLE_ONEDNN_OPTS ${TF_ENABLE_ONEDNN_OPTS}
# Copy over just ./build-prereq.sh, ./run-prereq.sh, and ./tools/build_absl.sh
# so we can cache these build steps.
WORKDIR /opt/deepvariant
COPY ./build-prereq.sh \
./run-prereq.sh \
./settings.sh \
/opt/deepvariant/
COPY ./tools/build_absl.sh /opt/deepvariant/tools/
COPY ./third_party/tensorflow.bzl.patch /opt/deepvariant/third_party/
RUN ./build-prereq.sh
#=====================================#
# Stage 4: Build DeepVariant Binaries #
#=====================================#
FROM prereq AS builder
COPY . /opt/deepvariant
RUN PATH="${HOME}/bin:${PATH}" ./build_release_binaries.sh # PATH for bazel
#===============================#
# Stage 5: Integrate everything #
#===============================#
FROM ${FROM_IMAGE}
ARG PYTHON_VERSION
ENV PYTHON_VERSION ${PYTHON_VERSION}
ENV DV_BIN_PATH=/opt/deepvariant/bin
# Install libraries
RUN apt-get -y update && \
apt-get install -y parallel python3-pip unzip && \
PATH="${HOME}/.local/bin:$PATH" python3 -m pip install absl-py==0.13.0 && \
apt-get clean autoclean && \
apt-get autoremove -y --purge && \
rm -rf /var/lib/apt/lists/*
# Since samtools/bcftools is relatively static, we copy them first.
# Copy over samtools and bcftools
COPY --from=hts_utils /opt/conda/envs/bio/bin /opt/conda/envs/bio/bin
COPY --from=hts_utils /opt/conda/envs/bio/lib /opt/conda/envs/bio/lib
# Integrate everything.
RUN echo "Acquire::http::proxy \"$http_proxy\";\n" \
"Acquire::https::proxy \"$https_proxy\";" > "/etc/apt/apt.conf"
WORKDIR /opt/
COPY --from=builder /usr/local/lib/python${PYTHON_VERSION}/dist-packages /usr/local/lib/python${PYTHON_VERSION}/dist-packages
COPY --from=builder /opt/deepvariant/bazel-bin/licenses.zip .
# Copy over zip binaries.
COPY --from=builder \
/opt/deepvariant/bazel-out/k8-opt/bin/deepvariant/make_examples_somatic.zip \
/opt/deepvariant/bazel-out/k8-opt/bin/deepvariant/call_variants.zip \
/opt/deepvariant/bazel-out/k8-opt/bin/deepvariant/postprocess_variants.zip \
/opt/deepvariant/bazel-out/k8-opt/bin/deepvariant/vcf_stats_report.zip \
/opt/deepvariant/bazel-out/k8-opt/bin/deepvariant/show_examples.zip \
/opt/deepvariant/bazel-out/k8-opt/bin/deepvariant/runtime_by_region_vis.zip \
/opt/deepvariant/bazel-out/k8-opt/bin/deepvariant/convert_to_saved_model.zip \
/opt/deepvariant/bazel-out/k8-opt/bin/deepvariant/train.zip \
/opt/deepvariant/scripts/run_deepsomatic.py \
/opt/deepvariant/bin/
# Create shell wrappers for python zip files for easier use.
RUN \
BASH_HEADER='#!/bin/bash' && \
printf "%s\n%s\n" \
"${BASH_HEADER}" \
'python3 -u /opt/deepvariant/bin/make_examples_somatic.zip "$@"' > \
/opt/deepvariant/bin/make_examples_somatic && \
printf "%s\n%s\n" \
"${BASH_HEADER}" \
'python3 /opt/deepvariant/bin/call_variants.zip "$@"' > \
/opt/deepvariant/bin/call_variants && \
printf "%s\n%s\n" \
"${BASH_HEADER}" \
'python3 /opt/deepvariant/bin/postprocess_variants.zip "$@"' > \
/opt/deepvariant/bin/postprocess_variants && \
printf "%s\n%s\n" \
"${BASH_HEADER}" \
'python3 /opt/deepvariant/bin/vcf_stats_report.zip "$@"' > \
/opt/deepvariant/bin/vcf_stats_report && \
printf "%s\n%s\n" \
"${BASH_HEADER}" \
'python3 /opt/deepvariant/bin/show_examples.zip "$@"' > \
/opt/deepvariant/bin/show_examples && \
printf "%s\n%s\n" \
"${BASH_HEADER}" \
'python3 /opt/deepvariant/bin/runtime_by_region_vis.zip "$@"' > \
/opt/deepvariant/bin/runtime_by_region_vis && \
mkdir -p /opt/deepvariant/bin/deepsomatic && \
printf "%s\n%s\n" \
"${BASH_HEADER}" \
'python3 -u /opt/deepvariant/bin/run_deepsomatic.py "$@"' > \
/opt/deepvariant/bin/deepsomatic/run_deepsomatic && \
chmod -R +x /opt/deepvariant/bin
# Copy over models
COPY --from=download_models /opt/models /opt/models
COPY --from=download_models /opt/smallmodels /opt/smallmodels
# This to use Keras 2.x with TF 2.16.1 or higher.
ENV TF_USE_LEGACY_KERAS=1
ENV PATH="${PATH}":${DV_BIN_PATH}:/opt/conda/envs/bio/bin:/opt/deepvariant/bin:/opt/deepvariant/bin/deepsomatic
WORKDIR /opt/deepvariant
CMD ["/opt/deepvariant/bin/deepsomatic/run_deepsomatic", "--help"]