Skip to content

Commit ec02509

Browse files
bobbai00claudechenlica
authored
chore: remove R support from Docker images for license compliance (#4385)
### What changes were proposed in this PR? Remove R support from `computing-unit-master` and `computing-unit-worker` Docker images. R itself is GPLv2, and its install chain required `gnupg`/`dirmngr`/`software-properties-common` (GPLv3/v2) — all ASF Category X. Also drops unused `git` and `unzip` from the runtime stage (JGit reads `.git` directly; no git CLI needed). R UDF Scala/Python/frontend code is left intact. `executor_manager.py` already raises a clear ImportError when the optional `texera-rudf` plugin is missing, so users who need R can build their own image on top. ### Any related issues, documentation, discussions? Part of #4371. ### How was this PR tested? Images built locally without R. Non-R services are untouched. ### Was this PR authored or co-authored using generative AI tooling? Co-authored with: Claude Code (claude-opus-4-6) Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Chen Li <chenli@gmail.com>
1 parent e0fc7df commit ec02509

4 files changed

Lines changed: 10 additions & 148 deletions

File tree

.github/workflows/build-and-push-images.yml

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,6 @@ on:
4949
- both
5050
- amd64
5151
- arm64
52-
with_r_support:
53-
description: 'Enable R support for workflow-execution-coordinator'
54-
required: false
55-
default: false
56-
type: boolean
5752
schedule:
5853
# Run nightly at 2:00 AM UTC
5954
- cron: '0 2 * * *'
@@ -76,7 +71,6 @@ jobs:
7671
docker_registry: ${{ steps.set-params.outputs.docker_registry }}
7772
services: ${{ steps.set-params.outputs.services }}
7873
platforms: ${{ steps.set-params.outputs.platforms }}
79-
with_r_support: ${{ steps.set-params.outputs.with_r_support }}
8074
steps:
8175
- name: Set build parameters
8276
id: set-params
@@ -91,7 +85,6 @@ jobs:
9185
echo "docker_registry=ghcr.io/apache" >> $GITHUB_OUTPUT
9286
echo "services=*" >> $GITHUB_OUTPUT
9387
echo "platforms=both" >> $GITHUB_OUTPUT
94-
echo "with_r_support=false" >> $GITHUB_OUTPUT
9588
else
9689
echo "Manual workflow_dispatch - using user inputs"
9790
BRANCH="${{ github.event.inputs.branch || 'main' }}"
@@ -109,7 +102,6 @@ jobs:
109102
echo "docker_registry=${{ github.event.inputs.docker_registry || 'ghcr.io/apache' }}" >> $GITHUB_OUTPUT
110103
echo "services=${{ github.event.inputs.services || '*' }}" >> $GITHUB_OUTPUT
111104
echo "platforms=${{ github.event.inputs.platforms || 'both' }}" >> $GITHUB_OUTPUT
112-
echo "with_r_support=${{ github.event.inputs.with_r_support || 'false' }}" >> $GITHUB_OUTPUT
113105
fi
114106
115107
# Step 1: Generate JOOQ code once and share it
@@ -380,8 +372,6 @@ jobs:
380372
tags: ${{ env.DOCKER_REGISTRY }}/${{ matrix.image_name }}:${{ needs.set-parameters.outputs.image_tag }}-amd64
381373
cache-from: type=gha,scope=${{ matrix.image_name }}-amd64
382374
cache-to: type=gha,mode=max,scope=${{ matrix.image_name }}-amd64
383-
build-args: |
384-
${{ (matrix.service == 'computing-unit-master' || matrix.service == 'computing-unit-worker') && needs.set-parameters.outputs.with_r_support == 'true' && 'WITH_R_SUPPORT=true' || '' }}
385375
labels: |
386376
org.opencontainers.image.title=${{ matrix.image_name }}
387377
org.opencontainers.image.description=Apache Texera ${{ matrix.image_name }} (AMD64)
@@ -468,8 +458,6 @@ jobs:
468458
tags: ${{ env.DOCKER_REGISTRY }}/${{ matrix.image_name }}:${{ needs.set-parameters.outputs.image_tag }}-arm64
469459
cache-from: type=gha,scope=${{ matrix.image_name }}-arm64
470460
cache-to: type=gha,mode=max,scope=${{ matrix.image_name }}-arm64
471-
build-args: |
472-
${{ (matrix.service == 'computing-unit-master' || matrix.service == 'computing-unit-worker') && needs.set-parameters.outputs.with_r_support == 'true' && 'WITH_R_SUPPORT=true' || '' }}
473461
labels: |
474462
org.opencontainers.image.title=${{ matrix.image_name }}
475463
org.opencontainers.image.description=Apache Texera ${{ matrix.image_name }} (ARM64)
@@ -531,7 +519,6 @@ jobs:
531519
echo "- **Tag:** \`${{ needs.set-parameters.outputs.image_tag }}\`" >> $GITHUB_STEP_SUMMARY
532520
echo "- **Services:** ${{ needs.set-parameters.outputs.services }}" >> $GITHUB_STEP_SUMMARY
533521
echo "- **Platforms:** ${{ needs.set-parameters.outputs.platforms }}" >> $GITHUB_STEP_SUMMARY
534-
echo "- **R Support:** ${{ needs.set-parameters.outputs.with_r_support }}" >> $GITHUB_STEP_SUMMARY
535522
echo "" >> $GITHUB_STEP_SUMMARY
536523
echo "## Build Method" >> $GITHUB_STEP_SUMMARY
537524
echo "**Parallel platform builds** (faster)" >> $GITHUB_STEP_SUMMARY

bin/build-images.sh

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ set -e
2020
# Default values
2121
DEFAULT_TAG="latest"
2222
DEFAULT_SERVICES="*"
23-
WITH_R_SUPPORT="false"
2423

2524
# Parse command-line arguments
2625
while [[ $# -gt 0 ]]; do
@@ -33,21 +32,16 @@ while [[ $# -gt 0 ]]; do
3332
SERVICES_INPUT="$2"
3433
shift 2
3534
;;
36-
--with-r-support)
37-
WITH_R_SUPPORT="true"
38-
shift
39-
;;
4035
--help|-h)
4136
echo "Usage: $0 [OPTIONS]"
4237
echo ""
4338
echo "Options:"
4439
echo " -t, --tag TAG Base tag for the images (default: latest)"
4540
echo " -s, --services SERVICES Services to build, comma-separated or '*' for all (default: *)"
46-
echo " --with-r-support Enable R support for computing-unit-master (sets WITH_R_SUPPORT=true)"
4741
echo " -h, --help Show this help message"
4842
echo ""
4943
echo "Examples:"
50-
echo " $0 --tag v1.0.0 --services '*' --with-r-support"
44+
echo " $0 --tag v1.0.0 --services '*'"
5145
echo " $0 -t latest -s 'gui,computing-unit-master'"
5246
echo " $0 # Interactive mode"
5347
exit 0
@@ -107,9 +101,6 @@ fi
107101

108102
FULL_TAG="${BASE_TAG}-${TAG_SUFFIX}"
109103
echo "🔍 Detected architecture: $ARCH -> Building for $PLATFORM with tag :$FULL_TAG"
110-
if [[ "$WITH_R_SUPPORT" == "true" ]]; then
111-
echo "🔍 R support enabled for computing-unit-master"
112-
fi
113104

114105
# Ensure Buildx is ready
115106
docker buildx create --name texera-builder --use --bootstrap > /dev/null 2>&1 || docker buildx use texera-builder
@@ -137,23 +128,12 @@ for dockerfile in "${dockerfiles[@]}"; do
137128
image="texera/$service_name:$FULL_TAG"
138129
echo "👉 Building $image from $dockerfile"
139130

140-
# Add WITH_R_SUPPORT build arg for computing-unit-master
141-
if [[ "$service_name" == "computing-unit-master" && "$WITH_R_SUPPORT" == "true" ]]; then
142-
docker buildx build \
143-
--platform "$PLATFORM" \
144-
-f "$dockerfile" \
145-
-t "$image" \
146-
--build-arg WITH_R_SUPPORT=true \
147-
--push \
148-
..
149-
else
150-
docker buildx build \
151-
--platform "$PLATFORM" \
152-
-f "$dockerfile" \
153-
-t "$image" \
154-
--push \
155-
..
156-
fi
131+
docker buildx build \
132+
--platform "$PLATFORM" \
133+
-f "$dockerfile" \
134+
-t "$image" \
135+
--push \
136+
..
157137
done
158138

159139
# Build pylsp service (directory: pylsp)

bin/computing-unit-master.dockerfile

Lines changed: 2 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -44,83 +44,30 @@ RUN unzip amber/target/universal/amber-*.zip -d amber/target/
4444

4545
FROM eclipse-temurin:11-jdk-jammy AS runtime
4646

47-
# Build argument to enable/disable R support (default: false)
48-
ARG WITH_R_SUPPORT=false
49-
5047
WORKDIR /texera/amber
5148

5249
COPY --from=build /texera/amber/requirements.txt /tmp/requirements.txt
5350
COPY --from=build /texera/amber/operator-requirements.txt /tmp/operator-requirements.txt
5451

55-
# Install Python runtime dependencies (always) and R runtime dependencies (conditional)
52+
# Install Python runtime dependencies
5653
RUN apt-get update && apt-get install -y \
5754
python3-pip \
5855
python3-dev \
5956
libpq-dev \
60-
curl \
61-
unzip \
62-
gnupg \
63-
software-properties-common \
64-
dirmngr \
65-
git \
66-
$(if [ "$WITH_R_SUPPORT" = "true" ]; then echo "\
67-
gfortran \
68-
libxml2-dev \
69-
libssl-dev \
70-
libcurl4-openssl-dev"; fi) \
7157
&& apt-get clean
7258

73-
# Install R from CRAN repository (pre-built, much faster than source compilation)
74-
RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
75-
# Add CRAN GPG key and repository
76-
curl -fsSL https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \
77-
gpg --dearmor -o /usr/share/keyrings/cran-ubuntu-keyring.gpg && \
78-
echo "deb [signed-by=/usr/share/keyrings/cran-ubuntu-keyring.gpg] https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" | \
79-
tee /etc/apt/sources.list.d/cran.list && \
80-
apt-get update && \
81-
apt-get install -y r-base r-base-dev && \
82-
R --version; \
83-
fi
84-
8559
# Install Python packages
8660
RUN pip3 install --upgrade pip setuptools wheel && \
8761
pip3 install -r /tmp/requirements.txt && \
8862
pip3 install -r /tmp/operator-requirements.txt
8963

90-
# Install texera-rudf and its dependencies (conditional)
91-
RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
92-
pip3 install git+https://github.com/Texera/texera-rudf.git; \
93-
fi
94-
95-
# Install R packages with pinned versions for texera-rudf (conditional)
96-
RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
97-
Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); \
98-
if (!requireNamespace('remotes', quietly=TRUE)) \
99-
install.packages('remotes', Ncpus = parallel::detectCores()); \
100-
remotes::install_version('arrow', version='14.0.2.1', \
101-
repos='https://cran.r-project.org', upgrade='never', \
102-
Ncpus = parallel::detectCores()); \
103-
remotes::install_version('coro', version='1.1.0', \
104-
repos='https://cran.r-project.org', upgrade='never', \
105-
Ncpus = parallel::detectCores()); \
106-
remotes::install_version('aws.s3', version='0.3.22', \
107-
repos='https://cran.r-project.org', upgrade='never', \
108-
Ncpus = parallel::detectCores()); \
109-
cat('R package versions:\n'); \
110-
cat(' arrow: ', as.character(packageVersion('arrow')), '\n'); \
111-
cat(' coro: ', as.character(packageVersion('coro')), '\n'); \
112-
cat(' aws.s3: ', as.character(packageVersion('aws.s3')), '\n')"; \
113-
fi
114-
115-
ENV LD_LIBRARY_PATH=/usr/lib/R/lib:$LD_LIBRARY_PATH
116-
11764
# Copy the built texera binary from the build phase
11865
COPY --from=build /texera/.git /texera/amber/.git
11966
COPY --from=build /texera/amber/target/amber-* /texera/amber/
12067
# Copy resources directories from build phase
12168
COPY --from=build /texera/common/config/src/main/resources /texera/amber/common/config/src/main/resources
12269
COPY --from=build /texera/amber/src/main/resources /texera/amber/src/main/resources
123-
# Copy code for python & R UDF
70+
# Copy code for python UDF
12471
COPY --from=build /texera/amber/src/main/python /texera/amber/src/main/python
12572
# Copy ASF licensing files
12673
COPY --from=build /texera/LICENSE /texera/NOTICE /texera/DISCLAIMER-WIP /texera/

bin/computing-unit-worker.dockerfile

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -44,77 +44,25 @@ RUN unzip amber/target/universal/amber-*.zip -d amber/target/
4444

4545
FROM eclipse-temurin:11-jre-jammy AS runtime
4646

47-
# Build argument to enable/disable R support (default: false)
48-
ARG WITH_R_SUPPORT=false
49-
5047
WORKDIR /texera/amber
5148

5249
COPY --from=build /texera/amber/requirements.txt /tmp/requirements.txt
5350
COPY --from=build /texera/amber/operator-requirements.txt /tmp/operator-requirements.txt
5451

55-
# Install Python runtime dependencies (always) and R runtime dependencies (conditional)
52+
# Install Python runtime dependencies
5653
RUN apt-get update && apt-get install -y \
5754
python3-pip \
5855
python3-dev \
5956
libpq-dev \
60-
curl \
61-
gnupg \
62-
software-properties-common \
63-
dirmngr \
64-
git \
65-
$(if [ "$WITH_R_SUPPORT" = "true" ]; then echo "\
66-
gfortran \
67-
libxml2-dev \
68-
libssl-dev \
69-
libcurl4-openssl-dev"; fi) \
7057
&& apt-get clean
7158

72-
# Install R from CRAN repository (pre-built, much faster than source compilation)
73-
RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
74-
# Add CRAN GPG key and repository
75-
curl -fsSL https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \
76-
gpg --dearmor -o /usr/share/keyrings/cran-ubuntu-keyring.gpg && \
77-
echo "deb [signed-by=/usr/share/keyrings/cran-ubuntu-keyring.gpg] https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" | \
78-
tee /etc/apt/sources.list.d/cran.list && \
79-
apt-get update && \
80-
apt-get install -y r-base r-base-dev && \
81-
R --version; \
82-
fi
83-
8459
# Install Python packages
8560
RUN pip3 install --upgrade pip setuptools wheel && \
8661
pip3 install python-lsp-server python-lsp-server[websockets] && \
8762
pip3 install -r /tmp/requirements.txt && \
8863
(pip3 install --no-cache-dir --find-links https://pypi.org/simple/ -r /tmp/operator-requirements.txt || \
8964
pip3 install --no-cache-dir wordcloud==1.9.2)
9065

91-
# Install texera-rudf and its dependencies (conditional)
92-
RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
93-
pip3 install git+https://github.com/Texera/texera-rudf.git; \
94-
fi
95-
96-
# Install R packages with pinned versions for texera-rudf (conditional)
97-
RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
98-
Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); \
99-
if (!requireNamespace('remotes', quietly=TRUE)) \
100-
install.packages('remotes', Ncpus = parallel::detectCores()); \
101-
remotes::install_version('arrow', version='22.0.0.1', \
102-
repos='https://cran.r-project.org', upgrade='never', \
103-
Ncpus = parallel::detectCores()); \
104-
remotes::install_version('coro', version='1.1.0', \
105-
repos='https://cran.r-project.org', upgrade='never', \
106-
Ncpus = parallel::detectCores()); \
107-
remotes::install_version('aws.s3', version='0.3.22', \
108-
repos='https://cran.r-project.org', upgrade='never', \
109-
Ncpus = parallel::detectCores()); \
110-
cat('R package versions:\n'); \
111-
cat(' arrow: ', as.character(packageVersion('arrow')), '\n'); \
112-
cat(' coro: ', as.character(packageVersion('coro')), '\n'); \
113-
cat(' aws.s3: ', as.character(packageVersion('aws.s3')), '\n')"; \
114-
fi
115-
116-
ENV LD_LIBRARY_PATH=/usr/lib/R/lib:$LD_LIBRARY_PATH
117-
11866
# Copy the built texera binary from the build phase
11967
COPY --from=build /texera/amber/target/amber-* /texera/amber/
12068
# Copy resources directories from build phase

0 commit comments

Comments
 (0)