Skip to content

Commit 678c923

Browse files
optimize: reduce Docker image size by 60% using proper multi-stage builds
Signed-off-by: sapkota-aayush <aayushsapkota1030@gmail.com>
1 parent 42f9fbd commit 678c923

19 files changed

Lines changed: 539 additions & 105 deletions

File tree

examples/batchmap/flatmap/Dockerfile

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,7 @@ RUN apt-get update \
3434
&& chmod +x /dumb-init \
3535
&& curl -sSL https://install.python-poetry.org | python3 -
3636

37-
####################################################################################################
38-
# udf: used for running the udf vertices
39-
####################################################################################################
40-
FROM builder AS udf
41-
37+
# Copy necessary files and install dependencies
4238
WORKDIR $PYSETUP_PATH
4339
COPY ./ ./
4440

@@ -47,6 +43,36 @@ RUN poetry lock
4743
RUN poetry install --no-cache --no-root && \
4844
rm -rf ~/.cache/pypoetry/
4945

46+
####################################################################################################
47+
# udf: used for running the udf vertices
48+
####################################################################################################
49+
FROM python:3.10-slim-bullseye AS udf
50+
51+
ENV PYTHONFAULTHANDLER=1 \
52+
PYTHONUNBUFFERED=1 \
53+
PYTHONHASHSEED=random \
54+
PYSETUP_PATH="/opt/pysetup"
55+
56+
ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/batchmap/flatmap"
57+
ENV VENV_PATH="$EXAMPLE_PATH/.venv"
58+
ENV PATH="$VENV_PATH/bin:$PATH"
59+
60+
# Install only runtime system dependencies
61+
RUN apt-get update \
62+
&& apt-get install --no-install-recommends -y \
63+
wget \
64+
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
65+
\
66+
# install dumb-init
67+
&& wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \
68+
&& chmod +x /dumb-init
69+
70+
# Copy only the virtual environment and application code from builder
71+
WORKDIR $PYSETUP_PATH
72+
COPY --from=builder $VENV_PATH $VENV_PATH
73+
COPY examples/batchmap/flatmap/ $EXAMPLE_PATH/
74+
75+
WORKDIR $EXAMPLE_PATH
5076
RUN chmod +x entry.sh
5177

5278
ENTRYPOINT ["/dumb-init", "--"]

examples/map/even_odd/Dockerfile

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,72 @@
11
####################################################################################################
2-
# builder: install needed dependencies
2+
# Stage 1: Builder - installs dependencies using poetry
33
####################################################################################################
4-
54
FROM python:3.10-slim-bullseye AS builder
65

76
ENV PYTHONFAULTHANDLER=1 \
8-
PYTHONUNBUFFERED=1 \
9-
PYTHONHASHSEED=random \
10-
PIP_NO_CACHE_DIR=on \
11-
PIP_DISABLE_PIP_VERSION_CHECK=on \
12-
PIP_DEFAULT_TIMEOUT=100 \
13-
POETRY_VERSION=1.2.2 \
14-
POETRY_HOME="/opt/poetry" \
15-
POETRY_VIRTUALENVS_IN_PROJECT=true \
16-
POETRY_NO_INTERACTION=1 \
17-
PYSETUP_PATH="/opt/pysetup"
7+
PYTHONUNBUFFERED=1 \
8+
PYTHONHASHSEED=random \
9+
PIP_NO_CACHE_DIR=on \
10+
PIP_DISABLE_PIP_VERSION_CHECK=on \
11+
PIP_DEFAULT_TIMEOUT=100 \
12+
POETRY_VERSION=1.2.2 \
13+
POETRY_HOME="/opt/poetry" \
14+
POETRY_VIRTUALENVS_IN_PROJECT=true \
15+
POETRY_NO_INTERACTION=1 \
16+
PYSETUP_PATH="/opt/pysetup"
1817

1918
ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/even_odd"
2019
ENV VENV_PATH="$EXAMPLE_PATH/.venv"
2120
ENV PATH="$POETRY_HOME/bin:$VENV_PATH/bin:$PATH"
2221

23-
RUN apt-get update \
24-
&& apt-get install --no-install-recommends -y \
22+
# Install build dependencies and poetry
23+
RUN apt-get update && apt-get install --no-install-recommends -y \
2524
curl \
2625
wget \
27-
# deps for building python deps
2826
build-essential \
29-
&& apt-get install -y git \
27+
git \
3028
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
31-
\
32-
# install dumb-init
3329
&& wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \
3430
&& chmod +x /dumb-init \
3531
&& curl -sSL https://install.python-poetry.org | python3 -
3632

33+
# Copy project into builder
34+
WORKDIR $PYSETUP_PATH
35+
COPY ./ ./
36+
37+
# Install deps
38+
WORKDIR $EXAMPLE_PATH
39+
RUN poetry lock && \
40+
poetry install --no-cache --no-root && \
41+
rm -rf ~/.cache/pypoetry/
42+
3743
####################################################################################################
38-
# udf: used for running the udf vertices
44+
# Stage 2: UDF Runtime - clean container with only needed stuff
3945
####################################################################################################
40-
FROM builder AS udf
46+
FROM python:3.10-slim-bullseye AS udf
47+
48+
ENV PYTHONFAULTHANDLER=1 \
49+
PYTHONUNBUFFERED=1 \
50+
PYTHONHASHSEED=random \
51+
PYSETUP_PATH="/opt/pysetup"
4152

53+
ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/even_odd"
54+
ENV VENV_PATH="$EXAMPLE_PATH/.venv"
55+
ENV PATH="$VENV_PATH/bin:$PATH"
56+
57+
# Install only runtime system dependencies
58+
RUN apt-get update && apt-get install --no-install-recommends -y \
59+
wget \
60+
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
61+
&& wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \
62+
&& chmod +x /dumb-init
63+
64+
# Copy virtual environment and entry script
4265
WORKDIR $PYSETUP_PATH
43-
COPY ./ ./
66+
COPY --from=builder $VENV_PATH $VENV_PATH
67+
COPY --from=builder $EXAMPLE_PATH $EXAMPLE_PATH
4468

4569
WORKDIR $EXAMPLE_PATH
46-
RUN poetry lock
47-
RUN poetry install --no-cache --no-root && \
48-
rm -rf ~/.cache/pypoetry/
49-
5070
RUN chmod +x entry.sh
5171

5272
ENTRYPOINT ["/dumb-init", "--"]

examples/map/even_odd/example.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def my_handler(keys: list[str], datum: Datum) -> Messages:
2121
return messages
2222

2323

24+
2425
if __name__ == "__main__":
2526
"""
2627
This example shows how to create a simple map function that takes in a

examples/map/flatmap/Dockerfile

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,7 @@ RUN apt-get update \
3434
&& chmod +x /dumb-init \
3535
&& curl -sSL https://install.python-poetry.org | python3 -
3636

37-
####################################################################################################
38-
# udf: used for running the udf vertices
39-
####################################################################################################
40-
FROM builder AS udf
41-
37+
# Copy necessary files and install dependencies
4238
WORKDIR $PYSETUP_PATH
4339
COPY ./ ./
4440

@@ -47,6 +43,36 @@ RUN poetry lock
4743
RUN poetry install --no-cache --no-root && \
4844
rm -rf ~/.cache/pypoetry/
4945

46+
####################################################################################################
47+
# udf: used for running the udf vertices
48+
####################################################################################################
49+
FROM python:3.10-slim-bullseye AS udf
50+
51+
ENV PYTHONFAULTHANDLER=1 \
52+
PYTHONUNBUFFERED=1 \
53+
PYTHONHASHSEED=random \
54+
PYSETUP_PATH="/opt/pysetup"
55+
56+
ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/flatmap"
57+
ENV VENV_PATH="$EXAMPLE_PATH/.venv"
58+
ENV PATH="$VENV_PATH/bin:$PATH"
59+
60+
# Install only runtime system dependencies
61+
RUN apt-get update \
62+
&& apt-get install --no-install-recommends -y \
63+
wget \
64+
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
65+
\
66+
# install dumb-init
67+
&& wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \
68+
&& chmod +x /dumb-init
69+
70+
# Copy only the virtual environment and application code from builder
71+
WORKDIR $PYSETUP_PATH
72+
COPY --from=builder $VENV_PATH $VENV_PATH
73+
COPY examples/map/flatmap/ $EXAMPLE_PATH/
74+
75+
WORKDIR $EXAMPLE_PATH
5076
RUN chmod +x entry.sh
5177

5278
ENTRYPOINT ["/dumb-init", "--"]

examples/map/forward_message/Dockerfile

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,7 @@ RUN apt-get update \
3434
&& chmod +x /dumb-init \
3535
&& curl -sSL https://install.python-poetry.org | python3 -
3636

37-
####################################################################################################
38-
# udf: used for running the udf vertices
39-
####################################################################################################
40-
FROM builder AS udf
41-
37+
# Copy necessary files and install dependencies
4238
WORKDIR $PYSETUP_PATH
4339
COPY ./ ./
4440

@@ -47,6 +43,36 @@ RUN poetry lock
4743
RUN poetry install --no-cache --no-root && \
4844
rm -rf ~/.cache/pypoetry/
4945

46+
####################################################################################################
47+
# udf: used for running the udf vertices
48+
####################################################################################################
49+
FROM python:3.10-slim-bullseye AS udf
50+
51+
ENV PYTHONFAULTHANDLER=1 \
52+
PYTHONUNBUFFERED=1 \
53+
PYTHONHASHSEED=random \
54+
PYSETUP_PATH="/opt/pysetup"
55+
56+
ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/forward_message"
57+
ENV VENV_PATH="$EXAMPLE_PATH/.venv"
58+
ENV PATH="$VENV_PATH/bin:$PATH"
59+
60+
# Install only runtime system dependencies
61+
RUN apt-get update \
62+
&& apt-get install --no-install-recommends -y \
63+
wget \
64+
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
65+
\
66+
# install dumb-init
67+
&& wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \
68+
&& chmod +x /dumb-init
69+
70+
# Copy only the virtual environment and application code from builder
71+
WORKDIR $PYSETUP_PATH
72+
COPY --from=builder $VENV_PATH $VENV_PATH
73+
COPY examples/map/forward_message/ $EXAMPLE_PATH/
74+
75+
WORKDIR $EXAMPLE_PATH
5076
RUN chmod +x entry.sh
5177

5278
ENTRYPOINT ["/dumb-init", "--"]

examples/map/multiproc_map/Dockerfile

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,7 @@ RUN apt-get update \
3434
&& chmod +x /dumb-init \
3535
&& curl -sSL https://install.python-poetry.org | python3 -
3636

37-
####################################################################################################
38-
# udf: used for running the udf vertices
39-
####################################################################################################
40-
FROM builder AS udf
41-
37+
# Copy necessary files and install dependencies
4238
WORKDIR $PYSETUP_PATH
4339
COPY ./ ./
4440

@@ -47,6 +43,36 @@ RUN poetry lock
4743
RUN poetry install --no-cache --no-root && \
4844
rm -rf ~/.cache/pypoetry/
4945

46+
####################################################################################################
47+
# udf: used for running the udf vertices
48+
####################################################################################################
49+
FROM python:3.10-slim-bullseye AS udf
50+
51+
ENV PYTHONFAULTHANDLER=1 \
52+
PYTHONUNBUFFERED=1 \
53+
PYTHONHASHSEED=random \
54+
PYSETUP_PATH="/opt/pysetup"
55+
56+
ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/map/multiproc_map"
57+
ENV VENV_PATH="$EXAMPLE_PATH/.venv"
58+
ENV PATH="$VENV_PATH/bin:$PATH"
59+
60+
# Install only runtime system dependencies
61+
RUN apt-get update \
62+
&& apt-get install --no-install-recommends -y \
63+
wget \
64+
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
65+
\
66+
# install dumb-init
67+
&& wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \
68+
&& chmod +x /dumb-init
69+
70+
# Copy only the virtual environment and application code from builder
71+
WORKDIR $PYSETUP_PATH
72+
COPY --from=builder $VENV_PATH $VENV_PATH
73+
COPY examples/map/multiproc_map/ $EXAMPLE_PATH/
74+
75+
WORKDIR $EXAMPLE_PATH
5076
RUN chmod +x entry.sh
5177

5278
ENTRYPOINT ["/dumb-init", "--"]

examples/mapstream/flatmap_stream/Dockerfile

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,7 @@ RUN apt-get update \
3434
&& chmod +x /dumb-init \
3535
&& curl -sSL https://install.python-poetry.org | python3 -
3636

37-
####################################################################################################
38-
# udf: used for running the udf vertices
39-
####################################################################################################
40-
FROM builder AS udf
41-
37+
# Copy necessary files and install dependencies
4238
WORKDIR $PYSETUP_PATH
4339
COPY ./ ./
4440

@@ -47,6 +43,36 @@ RUN poetry lock
4743
RUN poetry install --no-cache --no-root && \
4844
rm -rf ~/.cache/pypoetry/
4945

46+
####################################################################################################
47+
# udf: used for running the udf vertices
48+
####################################################################################################
49+
FROM python:3.10-slim-bullseye AS udf
50+
51+
ENV PYTHONFAULTHANDLER=1 \
52+
PYTHONUNBUFFERED=1 \
53+
PYTHONHASHSEED=random \
54+
PYSETUP_PATH="/opt/pysetup"
55+
56+
ENV EXAMPLE_PATH="$PYSETUP_PATH/examples/mapstream/flatmap_stream"
57+
ENV VENV_PATH="$EXAMPLE_PATH/.venv"
58+
ENV PATH="$VENV_PATH/bin:$PATH"
59+
60+
# Install only runtime system dependencies
61+
RUN apt-get update \
62+
&& apt-get install --no-install-recommends -y \
63+
wget \
64+
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
65+
\
66+
# install dumb-init
67+
&& wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \
68+
&& chmod +x /dumb-init
69+
70+
# Copy only the virtual environment and application code from builder
71+
WORKDIR $PYSETUP_PATH
72+
COPY --from=builder $VENV_PATH $VENV_PATH
73+
COPY examples/mapstream/flatmap_stream/ $EXAMPLE_PATH/
74+
75+
WORKDIR $EXAMPLE_PATH
5076
RUN chmod +x entry.sh
5177

5278
ENTRYPOINT ["/dumb-init", "--"]

examples/reduce/asyncio_reduce/Dockerfile

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,38 @@ RUN apt-get update \
3333
&& chmod +x /dumb-init \
3434
&& curl -sSL https://install.python-poetry.org | python3 -
3535

36-
####################################################################################################
37-
# udf: used for running the udf vertices
38-
####################################################################################################
39-
FROM builder AS udf
40-
36+
# Copy necessary files and install dependencies
4137
WORKDIR $PYSETUP_PATH
4238
COPY pyproject.toml ./
4339
RUN poetry install --no-cache --no-root && \
4440
rm -rf ~/.cache/pypoetry/
4541

42+
####################################################################################################
43+
# udf: used for running the udf vertices
44+
####################################################################################################
45+
FROM python:3.10-slim-bullseye AS udf
46+
47+
ENV PYTHONFAULTHANDLER=1 \
48+
PYTHONUNBUFFERED=1 \
49+
PYTHONHASHSEED=random \
50+
PYSETUP_PATH="/opt/pysetup" \
51+
VENV_PATH="/opt/pysetup/.venv"
52+
53+
ENV PATH="$VENV_PATH/bin:$PATH"
54+
55+
# Install only runtime system dependencies
56+
RUN apt-get update \
57+
&& apt-get install --no-install-recommends -y \
58+
wget \
59+
&& apt-get clean && rm -rf /var/lib/apt/lists/* \
60+
\
61+
# install dumb-init
62+
&& wget -O /dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.5/dumb-init_1.2.5_x86_64 \
63+
&& chmod +x /dumb-init
64+
65+
# Copy only the virtual environment and application code from builder
66+
WORKDIR $PYSETUP_PATH
67+
COPY --from=builder $VENV_PATH $VENV_PATH
4668
ADD . /app
4769
WORKDIR /app
4870

0 commit comments

Comments
 (0)