Skip to content

Commit 0c57d0c

Browse files
committed
Adds Docker Compose setup for Trino
Adds a Docker Compose configuration for setting up Trino with Iceberg, including support for Hive Metastore and REST catalog types. This allows for easier testing and development with Trino and Iceberg. closes apache#2219
1 parent 60ebe93 commit 0c57d0c

File tree

12 files changed

+446
-1
lines changed

12 files changed

+446
-1
lines changed

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,10 @@ test-integration-rebuild: ## Rebuild integration Docker services from scratch
117117
docker compose -f dev/docker-compose-integration.yml rm -f
118118
docker compose -f dev/docker-compose-integration.yml build --no-cache
119119

120+
test-integration-trino:
121+
sh ./dev/run-trino.sh
122+
$(TEST_RUNNER) pytest tests/ -m integration_trino $(PYTEST_ARGS)
123+
120124
test-s3: ## Run tests marked with @pytest.mark.s3
121125
sh ./dev/run-minio.sh
122126
$(TEST_RUNNER) pytest tests/ -m s3 $(PYTEST_ARGS)

dev/docker-compose-integration.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,21 @@ services:
5757
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
5858
- CATALOG_S3_ENDPOINT=http://minio:9000
5959
- CATALOG_JDBC_STRICT__MODE=true
60+
61+
trino:
62+
image: trinodb/trino:478
63+
container_name: pyiceberg-trino
64+
networks:
65+
iceberg_net:
66+
ports:
67+
- 8082:8080
68+
environment:
69+
- CATALOG_MANAGEMENT=dynamic
70+
depends_on:
71+
- rest
72+
- hive
73+
volumes:
74+
- ./trino/catalog:/etc/trino/catalog
6075
minio:
6176
image: minio/minio
6277
container_name: pyiceberg-minio

dev/docker-compose-trino.yml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
services:
2+
rest:
3+
image: apache/iceberg-rest-fixture
4+
container_name: pyiceberg-rest
5+
networks:
6+
iceberg_net:
7+
ports:
8+
- 8181:8181
9+
environment:
10+
- AWS_ACCESS_KEY_ID=admin
11+
- AWS_SECRET_ACCESS_KEY=password
12+
- AWS_REGION=us-east-1
13+
- CATALOG_WAREHOUSE=s3://warehouse/
14+
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
15+
- CATALOG_S3_ENDPOINT=http://minio:9000
16+
17+
trino:
18+
image: trinodb/trino:478
19+
container_name: pyiceberg-trino
20+
networks:
21+
iceberg_net:
22+
ports:
23+
- 8082:8080
24+
environment:
25+
- CATALOG_MANAGEMENT=dynamic
26+
depends_on:
27+
- rest
28+
- hive
29+
volumes:
30+
- ./trino/catalog:/etc/trino/catalog
31+
- ./trino/config.properties:/etc/trino/config.properties
32+
33+
minio:
34+
image: minio/minio
35+
container_name: pyiceberg-minio
36+
environment:
37+
- MINIO_ROOT_USER=admin
38+
- MINIO_ROOT_PASSWORD=password
39+
- MINIO_DOMAIN=minio
40+
networks:
41+
iceberg_net:
42+
aliases:
43+
- warehouse.minio
44+
ports:
45+
- 9001:9001
46+
- 9000:9000
47+
command: ["server", "/data", "--console-address", ":9001"]
48+
mc:
49+
depends_on:
50+
- minio
51+
image: minio/mc
52+
container_name: pyiceberg-mc
53+
networks:
54+
iceberg_net:
55+
environment:
56+
- AWS_ACCESS_KEY_ID=admin
57+
- AWS_SECRET_ACCESS_KEY=password
58+
- AWS_REGION=us-east-1
59+
entrypoint: >
60+
/bin/sh -c "
61+
until (/usr/bin/mc alias set minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
62+
/usr/bin/mc mb minio/warehouse;
63+
/usr/bin/mc policy set public minio/warehouse;
64+
tail -f /dev/null
65+
"
66+
67+
hive:
68+
build: hive/
69+
container_name: hive
70+
hostname: hive
71+
networks:
72+
iceberg_net:
73+
ports:
74+
- 9083:9083
75+
environment:
76+
SERVICE_NAME: "metastore"
77+
SERVICE_OPTS: "-Dmetastore.warehouse.dir=s3a://warehouse/hive/"
78+
79+
networks:
80+
iceberg_net:

dev/run-trino.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
#
20+
21+
set -ex
22+
23+
if [ $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ]; then
24+
echo "Trino service running"
25+
else
26+
docker compose -f dev/docker-compose-trino.yml kill
27+
docker compose -f dev/docker-compose-trino.yml up -d
28+
while [ -z $(docker ps -q --filter "name=pyiceberg-trino" --filter "status=running" ) ]
29+
do
30+
echo "Waiting for Trino"
31+
sleep 1
32+
done
33+
fi
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
connector.name=iceberg
2+
iceberg.catalog.type=hive_metastore
3+
iceberg.expire-snapshots.min-retention=0d
4+
iceberg.remove-orphan-files.min-retention=0d
5+
iceberg.register-table-procedure.enabled=true
6+
hive.metastore.uri=thrift://hive:9083
7+
iceberg.hive-catalog-name=hive
8+
fs.native-s3.enabled=true
9+
s3.region=us-east-1
10+
s3.aws-access-key=admin
11+
s3.aws-secret-key=password
12+
s3.endpoint=http://minio:9000
13+
s3.path-style-access=false
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
connector.name=iceberg
2+
iceberg.catalog.type=rest
3+
iceberg.rest-catalog.uri=http://rest:8181
4+
iceberg.rest-catalog.warehouse=s3://warehouse/default
5+
iceberg.rest-catalog.nested-namespace-enabled=true
6+
iceberg.rest-catalog.case-insensitive-name-matching=true
7+
iceberg.expire-snapshots.min-retention=0d
8+
iceberg.remove-orphan-files.min-retention=0d
9+
iceberg.register-table-procedure.enabled=true
10+
fs.native-s3.enabled=true
11+
s3.region=us-east-1
12+
s3.aws-access-key=admin
13+
s3.aws-secret-key=password
14+
s3.endpoint=http://minio:9000
15+
s3.path-style-access=false

dev/trino/config.properties

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#single node install config
2+
coordinator=true
3+
node-scheduler.include-coordinator=true
4+
http-server.http.port=8080
5+
discovery.uri=http://localhost:8080
6+
http-server.process-forwarded=true
7+
http-server.https.enabled=false
8+
catalog.management=${ENV:CATALOG_MANAGEMENT}

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ dev = [
119119
"docutils!=0.21.post1",
120120
"mypy-boto3-glue>=1.28.18",
121121
"mypy-boto3-dynamodb>=1.28.18",
122+
"trino[sqlalchemy]>=0.336.0",
123+
"pre-commit>=4.4.0",
122124
]
123125
# for mkdocs
124126
docs = [
@@ -153,6 +155,7 @@ markers = [
153155
"s3: marks a test as requiring access to s3 compliant storage (use with --aws-access-key-id, --aws-secret-access-key, and --endpoint args)",
154156
"adls: marks a test as requiring access to adls compliant storage (use with --adls.account-name, --adls.account-key, and --adls.endpoint args)",
155157
"integration: marks integration tests against Apache Spark",
158+
"integration_trino: marks integration tests against Trino",
156159
"gcs: marks a test as requiring access to gcs compliant storage (use with --gs.token, --gs.project, and --gs.endpoint)",
157160
"benchmark: collection of tests to validate read/write performance before and after a change",
158161
]

tests/conftest.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import pytest
4848
from moto import mock_aws
4949
from pydantic_core import to_json
50+
from sqlalchemy import Connection
5051

5152
from pyiceberg.catalog import Catalog, load_catalog
5253
from pyiceberg.catalog.noop import NoopCatalog
@@ -143,6 +144,18 @@ def pytest_addoption(parser: pytest.Parser) -> None:
143144
"--gcs.oauth2.token", action="store", default="anon", help="The GCS authentication method for tests marked gcs"
144145
)
145146
parser.addoption("--gcs.project-id", action="store", default="test", help="The GCP project for tests marked gcs")
147+
parser.addoption(
148+
"--trino.rest.endpoint",
149+
action="store",
150+
default="trino://test@localhost:8082/warehouse_rest",
151+
help="The Trino REST endpoint URL for tests marked as integration_trino",
152+
)
153+
parser.addoption(
154+
"--trino.hive.endpoint",
155+
action="store",
156+
default="trino://test@localhost:8082/warehouse_hive",
157+
help="The Trino Hive endpoint URL for tests marked as integration_trino",
158+
)
146159

147160

148161
@pytest.fixture(scope="session")
@@ -2574,6 +2587,28 @@ def bound_reference_uuid() -> BoundReference[str]:
25742587
return BoundReference(field=NestedField(1, "field", UUIDType(), required=False), accessor=Accessor(position=0, inner=None))
25752588

25762589

2590+
@pytest.fixture(scope="session")
2591+
def trino_hive_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]:
2592+
from sqlalchemy import create_engine
2593+
2594+
trino_endpoint = request.config.getoption("--trino.hive.endpoint")
2595+
engine = create_engine(trino_endpoint)
2596+
connection = engine.connect()
2597+
yield connection
2598+
connection.close()
2599+
2600+
2601+
@pytest.fixture(scope="session")
2602+
def trino_rest_conn(request: pytest.FixtureRequest) -> Generator[Connection, None, None]:
2603+
from sqlalchemy import create_engine
2604+
2605+
trino_endpoint = request.config.getoption("--trino.rest.endpoint")
2606+
engine = create_engine(trino_endpoint)
2607+
connection = engine.connect()
2608+
yield connection
2609+
connection.close()
2610+
2611+
25772612
@pytest.fixture(scope="session")
25782613
def session_catalog() -> Catalog:
25792614
return load_catalog(

tests/integration/test_rest_catalog.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# pylint:disable=redefined-outer-name
1818

1919
import pytest
20+
from sqlalchemy import Connection, inspect
2021

2122
from pyiceberg.catalog.rest import RestCatalog
2223

@@ -61,3 +62,22 @@ def test_create_namespace_if_already_existing(catalog: RestCatalog) -> None:
6162
catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER)
6263

6364
assert catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER)
65+
66+
67+
@pytest.mark.integration
68+
@pytest.mark.integration_trino
69+
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog")])
70+
def test_schema_exists_in_trino(trino_rest_conn: Connection, catalog: RestCatalog) -> None:
71+
"""Verifies that an Iceberg namespace correctly appears as a schema in Trino.
72+
73+
This test ensures the synchronization between Iceberg's namespace concept and
74+
Trino's schema concept, confirming that after creating a namespace in the Iceberg
75+
catalog, it becomes visible as a schema in the Trino environment.
76+
"""
77+
78+
if not catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
79+
catalog.drop_namespace(TEST_NAMESPACE_IDENTIFIER)
80+
catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER)
81+
82+
assert catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER)
83+
assert TEST_NAMESPACE_IDENTIFIER.lower() in inspect(trino_rest_conn).get_schema_names()

0 commit comments

Comments
 (0)