Skip to content

Commit 05c9339

Browse files
committed
Merge branch 'main' of github.com:apache/iceberg-python into fd-rust
2 parents 16a20ec + 26ecfe7 commit 05c9339

23 files changed

+2131
-67
lines changed

.github/workflows/python-ci.yml

Lines changed: 96 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,10 @@ concurrency:
4343
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
4444

4545
jobs:
46-
lint-and-test:
46+
lint-and-unit-test:
4747
runs-on: ubuntu-latest
4848
strategy:
49+
fail-fast: true
4950
matrix:
5051
python: ['3.10', '3.11', '3.12', '3.13']
5152

@@ -56,6 +57,8 @@ jobs:
5657
python-version: ${{ matrix.python }}
5758
- name: Install UV
5859
uses: astral-sh/setup-uv@v7
60+
with:
61+
enable-cache: true
5962
- name: Install system dependencies
6063
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
6164
- name: Install
@@ -69,45 +72,130 @@ jobs:
6972

7073
integration-test:
7174
runs-on: ubuntu-latest
72-
strategy:
73-
matrix:
74-
python: ['3.10', '3.11', '3.12', '3.13']
75-
7675
steps:
7776
- uses: actions/checkout@v6
7877
- uses: actions/setup-python@v6
7978
with:
80-
python-version: ${{ matrix.python }}
79+
python-version: '3.12'
8180
- name: Install UV
8281
uses: astral-sh/setup-uv@v7
82+
with:
83+
enable-cache: true
8384
- name: Install system dependencies
8485
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
8586
- name: Install
8687
run: make install
87-
8888
- name: Run integration tests with coverage
8989
run: COVERAGE=1 make test-integration
9090
- name: Show debug logs
9191
if: ${{ failure() }}
92-
run: docker compose -f dev/docker-compose.yml logs
92+
run: docker compose -f dev/docker-compose-integration.yml logs
93+
- name: Upload coverage data
94+
uses: actions/upload-artifact@v4
95+
with:
96+
name: coverage-integration
97+
path: .coverage*
98+
include-hidden-files: true
9399

100+
integration-test-s3:
101+
runs-on: ubuntu-latest
102+
steps:
103+
- uses: actions/checkout@v6
104+
- uses: actions/setup-python@v6
105+
with:
106+
python-version: '3.12'
107+
- name: Install UV
108+
uses: astral-sh/setup-uv@v7
109+
with:
110+
enable-cache: true
111+
- name: Install system dependencies
112+
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
113+
- name: Install
114+
run: make install
94115
- name: Run s3 integration tests with coverage
95116
run: COVERAGE=1 make test-s3
96117
- name: Show debug logs
97118
if: ${{ failure() }}
98119
run: docker compose -f dev/docker-compose.yml logs
120+
- name: Upload coverage data
121+
uses: actions/upload-artifact@v4
122+
with:
123+
name: coverage-s3
124+
path: .coverage*
125+
include-hidden-files: true
99126

127+
integration-test-adls:
128+
runs-on: ubuntu-latest
129+
steps:
130+
- uses: actions/checkout@v6
131+
- uses: actions/setup-python@v6
132+
with:
133+
python-version: '3.12'
134+
- name: Install UV
135+
uses: astral-sh/setup-uv@v7
136+
with:
137+
enable-cache: true
138+
- name: Install system dependencies
139+
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
140+
- name: Install
141+
run: make install
100142
- name: Run adls integration tests with coverage
101143
run: COVERAGE=1 make test-adls
102144
- name: Show debug logs
103145
if: ${{ failure() }}
104146
run: docker compose -f dev/docker-compose-azurite.yml logs
147+
- name: Upload coverage data
148+
uses: actions/upload-artifact@v4
149+
with:
150+
name: coverage-adls
151+
path: .coverage*
152+
include-hidden-files: true
105153

154+
integration-test-gcs:
155+
runs-on: ubuntu-latest
156+
steps:
157+
- uses: actions/checkout@v6
158+
- uses: actions/setup-python@v6
159+
with:
160+
python-version: '3.12'
161+
- name: Install UV
162+
uses: astral-sh/setup-uv@v7
163+
with:
164+
enable-cache: true
165+
- name: Install system dependencies
166+
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
167+
- name: Install
168+
run: make install
106169
- name: Run gcs integration tests with coverage
107170
run: COVERAGE=1 make test-gcs
108171
- name: Show debug logs
109172
if: ${{ failure() }}
110173
run: docker compose -f dev/docker-compose-gcs-server.yml logs
174+
- name: Upload coverage data
175+
uses: actions/upload-artifact@v4
176+
with:
177+
name: coverage-gcs
178+
path: .coverage*
179+
include-hidden-files: true
111180

181+
integration-coverage-report:
182+
runs-on: ubuntu-latest
183+
needs: [integration-test, integration-test-s3, integration-test-adls, integration-test-gcs]
184+
steps:
185+
- uses: actions/checkout@v6
186+
- uses: actions/setup-python@v6
187+
with:
188+
python-version: '3.12'
189+
- name: Install UV
190+
uses: astral-sh/setup-uv@v7
191+
with:
192+
enable-cache: true
193+
- name: Install dependencies
194+
run: uv sync --group dev
195+
- name: Download all coverage artifacts
196+
uses: actions/download-artifact@v4
197+
with:
198+
pattern: coverage-*
199+
merge-multiple: true
112200
- name: Generate coverage report (75%) # Coverage threshold should only increase over time — never decrease it!
113201
run: COVERAGE_FAIL_UNDER=75 make coverage-report

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ bin/
4141
.mypy_cache/
4242
htmlcov
4343

44+
# Jupyter notebook checkpoints
45+
.ipynb_checkpoints/
46+
4447
pyiceberg/avro/decoder_fast.c
4548
pyiceberg/avro/*.html
4649
pyiceberg/avro/*.so

.pre-commit-config.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ repos:
3232
- id: ruff
3333
args: [ --fix, --exit-non-zero-on-fix ]
3434
- id: ruff-format
35+
- repo: https://github.com/nbQA-dev/nbQA
36+
rev: 1.9.1
37+
hooks:
38+
- id: nbqa-ruff
39+
args: [ --fix, --exit-non-zero-on-fix ]
3540
- repo: https://github.com/pre-commit/mirrors-mypy
3641
rev: v1.18.2
3742
hooks:
@@ -70,3 +75,11 @@ repos:
7075
rev: v2.4.1
7176
hooks:
7277
- id: codespell
78+
- repo: local
79+
hooks:
80+
- id: uv-lock-check
81+
name: uv lock file check
82+
entry: make uv-lock-check
83+
language: system
84+
pass_filenames: false
85+
files: ^(pyproject\.toml|uv\.lock)$

Makefile

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,10 @@ setup-venv: ## Create virtual environment
7272
install-dependencies: setup-venv ## Install all dependencies including extras
7373
uv sync $(PYTHON_ARG) --all-extras --reinstall
7474

75-
install: install-uv install-dependencies ## Install uv and dependencies
75+
install-hooks: ## Install pre-commit hooks
76+
uv run $(PYTHON_ARG) prek install
77+
78+
install: install-uv install-dependencies install-hooks ## Install uv, dependencies, and pre-commit hooks
7679

7780
# ===============
7881
# Code Validation
@@ -97,7 +100,7 @@ test: ## Run all unit tests (excluding integration)
97100

98101
test-integration: test-integration-setup test-integration-exec test-integration-cleanup ## Run integration tests
99102

100-
test-integration-setup: ## Start Docker services for integration tests
103+
test-integration-setup: install ## Start Docker services for integration tests
101104
docker compose -f dev/docker-compose-integration.yml kill
102105
docker compose -f dev/docker-compose-integration.yml rm -f
103106
docker compose -f dev/docker-compose-integration.yml up -d --build --wait
@@ -153,6 +156,21 @@ docs-serve: ## Serve local docs preview (hot reload)
153156
docs-build: ## Build the static documentation site
154157
uv run $(PYTHON_ARG) mkdocs build -f mkdocs/mkdocs.yml --strict
155158

159+
# ========================
160+
# Experimentation
161+
# ========================
162+
163+
##@ Experimentation
164+
165+
notebook-install: ## Install notebook dependencies
166+
uv sync $(PYTHON_ARG) --all-extras --group notebook
167+
168+
notebook: notebook-install ## Launch notebook for experimentation
169+
uv run jupyter lab --notebook-dir=notebooks
170+
171+
notebook-infra: notebook-install test-integration-setup ## Launch notebook with integration test infra (Spark, Iceberg Rest Catalog, object storage, etc.)
172+
uv run jupyter lab --notebook-dir=notebooks
173+
156174
# ===================
157175
# Project Maintenance
158176
# ===================
@@ -167,4 +185,14 @@ clean: ## Remove build artifacts and caches
167185
@find . -name "__pycache__" -exec echo Deleting {} \; -exec rm -rf {} +
168186
@find . -name "*.pyd" -exec echo Deleting {} \; -delete
169187
@find . -name "*.pyo" -exec echo Deleting {} \; -delete
188+
@echo "Cleaning up Jupyter notebook checkpoints..."
189+
@find . -name ".ipynb_checkpoints" -exec echo Deleting {} \; -exec rm -rf {} +
170190
@echo "Cleanup complete."
191+
192+
uv-lock: ## Regenerate uv.lock file from pyproject.toml
193+
uv lock $(PYTHON_ARG)
194+
195+
uv-lock-check: ## Verify uv.lock is up to date
196+
@command -v uv >/dev/null || \
197+
(echo "uv is required. Run 'make install' or 'make install-uv' first." && exit 1)
198+
uv lock --check $(PYTHON_ARG)

dev/.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ build
55
.gitignore
66
uv.lock
77
mkdocs/*
8+
notebooks/*

dev/docker-compose-integration.yml

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,22 @@
1717

1818
services:
1919
spark-iceberg:
20-
container_name: pyiceberg-spark
20+
image: pyiceberg-spark:latest
2121
build: spark/
22+
container_name: pyiceberg-spark
2223
networks:
2324
iceberg_net:
2425
depends_on:
2526
- rest
2627
- hive
2728
- minio
29+
ports:
30+
- 15002:15002 # Spark Connect
31+
- 4040:4040 # Spark UI
2832
environment:
2933
- AWS_ACCESS_KEY_ID=admin
3034
- AWS_SECRET_ACCESS_KEY=password
3135
- AWS_REGION=us-east-1
32-
ports:
33-
- 15002:15002 # Spark Connect
34-
- 4040:4040 # Spark UI
3536
links:
3637
- rest:rest
3738
- hive:hive
@@ -60,25 +61,25 @@ services:
6061
minio:
6162
image: minio/minio
6263
container_name: pyiceberg-minio
63-
environment:
64-
- MINIO_ROOT_USER=admin
65-
- MINIO_ROOT_PASSWORD=password
66-
- MINIO_DOMAIN=minio
6764
networks:
6865
iceberg_net:
6966
aliases:
7067
- warehouse.minio
7168
ports:
7269
- 9001:9001
7370
- 9000:9000
71+
environment:
72+
- MINIO_ROOT_USER=admin
73+
- MINIO_ROOT_PASSWORD=password
74+
- MINIO_DOMAIN=minio
7475
command: ["server", "/data", "--console-address", ":9001"]
7576
mc:
76-
depends_on:
77-
- minio
7877
image: minio/mc
7978
container_name: pyiceberg-mc
8079
networks:
8180
iceberg_net:
81+
depends_on:
82+
- minio
8283
environment:
8384
- AWS_ACCESS_KEY_ID=admin
8485
- AWS_SECRET_ACCESS_KEY=password
@@ -91,6 +92,7 @@ services:
9192
tail -f /dev/null
9293
"
9394
hive:
95+
image: pyiceberg-hive:latest
9496
build: hive/
9597
container_name: pyiceberg-hive
9698
hostname: hive

dev/hive/Dockerfile

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,28 @@
1515

1616
FROM apache/hive:4.0.0
1717

18-
ENV HADOOP_VERSION=3.3.6
19-
ENV AWS_SDK_BUNDLE=1.12.753
18+
# Dependency versions - changing these invalidates the JAR download layer
19+
ARG HADOOP_VERSION=3.3.6
20+
ARG AWS_SDK_BUNDLE=1.12.753
21+
ARG MAVEN_MIRROR=https://repo1.maven.org/maven2
2022

2123
USER root
2224

23-
# Install curl, download JARs, and cleanup in a single layer
24-
RUN apt-get update -qq && apt-get -qq -y install curl && \
25-
curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar -Lo /opt/hive/lib/hadoop-aws-${HADOOP_VERSION}.jar && \
26-
curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_SDK_BUNDLE}/aws-java-sdk-bundle-${AWS_SDK_BUNDLE}.jar -Lo /opt/hive/lib/aws-java-sdk-bundle-${AWS_SDK_BUNDLE}.jar && \
27-
apt-get clean && rm -rf /var/lib/apt/lists/*
25+
# Install curl (separate layer - rarely changes)
26+
RUN apt-get update -qq && \
27+
apt-get -qq -y install --no-install-recommends curl && \
28+
apt-get clean && \
29+
rm -rf /var/lib/apt/lists/*
2830

31+
# Download JARs with retry logic (slow layer - only changes when versions change)
32+
RUN curl -fsSL --retry 3 --retry-delay 5 \
33+
-o /opt/hive/lib/hadoop-aws-${HADOOP_VERSION}.jar \
34+
"${MAVEN_MIRROR}/org/apache/hadoop/hadoop-aws/${HADOOP_VERSION}/hadoop-aws-${HADOOP_VERSION}.jar" && \
35+
curl -fsSL --retry 3 --retry-delay 5 \
36+
-o /opt/hive/lib/aws-java-sdk-bundle-${AWS_SDK_BUNDLE}.jar \
37+
"${MAVEN_MIRROR}/com/amazonaws/aws-java-sdk-bundle/${AWS_SDK_BUNDLE}/aws-java-sdk-bundle-${AWS_SDK_BUNDLE}.jar"
38+
39+
# Copy configuration last (changes more frequently than JARs)
2940
COPY core-site.xml /opt/hadoop/etc/hadoop/core-site.xml
3041

3142
USER hive

0 commit comments

Comments
 (0)