Skip to content

Commit 0c57d0c

Browse files
committed
Adds Docker Compose setup for Trino
Adds a Docker Compose configuration for setting up Trino with Iceberg, including support for Hive Metastore and REST catalog types. This allows for easier testing and development with Trino and Iceberg. closes #2219
1 parent 60ebe93 commit 0c57d0c

File tree

12 files changed

+446
-1
lines changed

12 files changed

+446
-1
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ test-integration-rebuild: ## Rebuild integration Docker services from scratch
117117
docker compose -f dev/docker-compose-integration.yml rm -f
118118
docker compose -f dev/docker-compose-integration.yml build --no-cache
119119

120+
test-integration-trino:
121+
sh ./dev/run-trino.sh
122+
$(TEST_RUNNER) pytest tests/ -m integration_trino $(PYTEST_ARGS)
123+
120124
test-s3: ## Run tests marked with @pytest.mark.s3
121125
sh ./dev/run-minio.sh
122126
$(TEST_RUNNER) pytest tests/ -m s3 $(PYTEST_ARGS)

dev/docker-compose-integration.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,21 @@ services:
5757
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
5858
- CATALOG_S3_ENDPOINT=http://minio:9000
5959
- CATALOG_JDBC_STRICT__MODE=true
60+
61+
trino:
62+
image: trinodb/trino:478
63+
container_name: pyiceberg-trino
64+
networks:
65+
iceberg_net:
66+
ports:
67+
- 8082:8080
68+
environment:
69+
- CATALOG_MANAGEMENT=dynamic
70+
depends_on:
71+
- rest
72+
- hive
73+
volumes:
74+
- ./trino/catalog:/etc/trino/catalog
6075
minio:
6176
image: minio/minio
6277
container_name: pyiceberg-minio

dev/docker-compose-trino.yml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
services:
2+
rest:
3+
image: apache/iceberg-rest-fixture
4+
container_name: pyiceberg-rest
5+
networks:
6+
iceberg_net:
7+
ports:
8+
- 8181:8181
9+
environment:
10+
- AWS_ACCESS_KEY_ID=admin
11+
- AWS_SECRET_ACCESS_KEY=password
12+
- AWS_REGION=us-east-1
13+
- CATALOG_WAREHOUSE=s3://warehouse/
14+
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
15+
- CATALOG_S3_ENDPOINT=http://minio:9000
16+
17+
trino:
18+
image: trinodb/trino:478
19+
container_name: pyiceberg-trino
20+
networks:
21+
iceberg_net:
22+
ports:
23+
- 8082:8080
24+
environment:
25+
- CATALOG_MANAGEMENT=dynamic
26+
depends_on:
27+
- rest
28+
- hive
29+
volumes:
30+
- ./trino/catalog:/etc/trino/catalog
31+
- ./trino/config.properties:/etc/trino/config.properties
32+
33+
minio:
34+
image: minio/minio
35+
container_name: pyiceberg-minio
36+
environment:
37+
- MINIO_ROOT_USER=admin
38+
- MINIO_ROOT_PASSWORD=password
39+
- MINIO_DOMAIN=minio
40+
networks:
41+
iceberg_net:
42+
aliases:
43+
- warehouse.minio
44+
ports:
45+
- 9001:9001
46+
- 9000:9000
47+
command: ["server", "/data", "--console-address", ":9001"]
48+
mc:
49+
depends_on:
50+
- minio
51+
image: minio/mc
52+
container_name: pyiceberg-mc
53+
networks:
54+
iceberg_net:
55+
environment:
56+
- AWS_ACCESS_KEY_ID=admin
57+
- AWS_SECRET_ACCESS_KEY=password
58+
- AWS_REGION=us-east-1
59+
entrypoint: >
60+
/bin/sh -c "
61+
until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
62+
/usr/bin/mc mb minio/warehouse;
63+
/usr/bin/mc policy set public minio/warehouse;
64+
tail -f /dev/null
65+
"
66+
67+
hive:
68+
build: hive/
69+
container_name: hive
70+
hostname: hive
71+
networks:
72+
iceberg_net:
73+
ports:
74+
- 9083:9083
75+
environment:
76+
SERVICE_NAME: "metastore"
77+
SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/"
78+
79+
networks:
80+
iceberg_net:

dev/run-trino.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
#
20+
21+
set -ex
22+
23+
if [ $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ]; then
24+
echo "Trino service running"
25+
else
26+
docker compose -f dev/docker-compose-trino.yml kill
27+
docker compose -f dev/docker-compose-trino.yml up -d
28+
while [ -z $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ]
29+
do
30+
echo "Waiting for Trino"
31+
sleep 1
32+
done
33+
fi
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
connector.name=iceberg
2+
iceberg.catalog.type=hive_metastore
3+
iceberg.expire-snapshots.min-retention=0d
4+
iceberg.remove-orphan-files.min-retention=0d
5+
iceberg.register-table-procedure.enabled=true
6+
hive.metastore.uri=thrift://hive:9083
7+
iceberg.hive-catalog-name=hive
8+
fs.native-s3.enabled=true
9+
s3.region=us-east-1
10+
s3.aws-access-key=admin
11+
s3.aws-secret-key=password
12+
s3.endpoint=http://minio:9000
13+
s3.path-style-access=false
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
connector.name=iceberg
2+
iceberg.catalog.type=rest
3+
iceberg.rest-catalog.uri=http://rest:8181
4+
iceberg.rest-catalog.warehouse=s3://warehouse/default
5+
iceberg.rest-catalog.nested-namespace-enabled=true
6+
iceberg.rest-catalog.case-insensitive-name-matching=true
7+
iceberg.expire-snapshots.min-retention=0d
8+
iceberg.remove-orphan-files.min-retention=0d
9+
iceberg.register-table-procedure.enabled=true
10+
fs.native-s3.enabled=true
11+
s3.region=us-east-1
12+
s3.aws-access-key=admin
13+
s3.aws-secret-key=password
14+
s3.endpoint=http://minio:9000
15+
s3.path-style-access=false

dev/trino/config.properties

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#single node install config
2+
coordinator=true
3+
node-scheduler.include-coordinator=true
4+
http-server.http.port=8080
5+
discovery.uri=http://localhost:8080
6+
http-server.process-forwarded=true
7+
http-server.https.enabled=false
8+
catalog.management=${ENV:CATALOG_MANAGEMENT}

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ dev = [
119119
"docutils!=0.21.post1",
120120
"mypy-boto3-glue>=1.28.18",
121121
"mypy-boto3-dynamodb>=1.28.18",
122+
"trino[sqlalchemy]>=0.336.0",
123+
"pre-commit>=4.4.0",
122124
]
123125
# for mkdocs
124126
docs = [
@@ -153,6 +155,7 @@ markers = [
153155
"s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)",
154156
"adls: marks a test as requiring access to adls compliant storage (use with --adls.account-name, --adls.account-key, and --adls.endpoint args)",
155157
"integration: marks integration tests against Apache Spark",
158+
"integration_trino: marks integration tests against Trino",
156159
"gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)",
157160
"benchmark: collection of tests to validate read/write performance before and after a change",
158161
]

tests/conftest.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import pytest
4848
from moto import mock_aws
4949
from pydantic_core import to_json
50+
from sqlalchemy import Connection
5051

5152
from pyiceberg.catalog import Catalog, load_catalog
5253
from pyiceberg.catalog.noop import NoopCatalog
@@ -143,6 +144,18 @@ def pytest_addoption(parser: pytest.Parser) -> None:
143144
"--gcs.oauth2.token", action="store", default="anon", help="The GCS authentication method for tests marked gcs"
144145
)
145146
parser.addoption("--gcs.project-id", action="store", default="test", help="The GCP project for tests marked gcs")
147+
parser.addoption(
148+
"--trino.rest.endpoint",
149+
action="store",
150+
default="trino://test@localhost:8082/warehouse_rest",
151+
help="The Trino REST endpoint URL for tests marked as integration_trino",
152+
)
153+
parser.addoption(
154+
"--trino.hive.endpoint",
155+
action="store",
156+
default="trino://test@localhost:8082/warehouse_hive",
157+
help="The Trino Hive endpoint URL for tests marked as integration_trino",
158+
)
146159

147160

148161
@pytest.fixture(scope="session")
@@ -2574,6 +2587,28 @@ def bound_reference_uuid() -> BoundReference[str]:
25742587
return BoundReference(field=NestedField(1, "field", UUIDType(), required=False), accessor=Accessor(position=0, inner=None))
25752588

25762589

2590+
@pytest.fixture(scope="session")
2591+
def trino_hive_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]:
2592+
from sqlalchemy import create_engine
2593+
2594+
trino_endpoint = request.config.getoption("--trino.hive.endpoint")
2595+
engine = create_engine(trino_endpoint)
2596+
connection = engine.connect()
2597+
yield connection
2598+
connection.close()
2599+
2600+
2601+
@pytest.fixture(scope="session")
2602+
def trino_rest_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]:
2603+
from sqlalchemy import create_engine
2604+
2605+
trino_endpoint = request.config.getoption("--trino.rest.endpoint")
2606+
engine = create_engine(trino_endpoint)
2607+
connection = engine.connect()
2608+
yield connection
2609+
connection.close()
2610+
2611+
25772612
@pytest.fixture(scope="session")
25782613
def session_catalog() -> Catalog:
25792614
return load_catalog(

tests/integration/test_rest_catalog.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# pylint:disable=redefined-outer-name
1818

1919
import pytest
20+
from sqlalchemy import Connection, inspect
2021

2122
from pyiceberg.catalog.rest import RestCatalog
2223

@@ -61,3 +62,22 @@ def test_create_namespace_if_already_existing(catalog: RestCatalog) -> None:
6162
catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER)
6263

6364
assert catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER)
65+
66+
67+
@pytest.mark.integration
68+
@pytest.mark.integration_trino
69+
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog")])
70+
def test_schema_exists_in_trino(trino_rest_conn: Connection, catalog: RestCatalog) -> None:
71+
"""Verifies that an Iceberg namespace correctly appears as a schema in Trino.
72+
73+
This test ensures the synchronization between Iceberg's namespace concept and
74+
Trino's schema concept, confirming that after creating a namespace in the Iceberg
75+
catalog, it becomes visible as a schema in the Trino environment.
76+
"""
77+
78+
if not catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
79+
catalog.drop_namespace(TEST_NAMESPACE_IDENTIFIER)
80+
catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER)
81+
82+
assert catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER)
83+
assert TEST_NAMESPACE_IDENTIFIER.lower() in inspect(trino_rest_conn).get_schema_names()

0 commit comments

Comments
 (0)