Skip to content

Commit 2149e7e

Browse files
committed
fix polaris
1 parent e692eeb commit 2149e7e

16 files changed

Lines changed: 691 additions & 252 deletions

File tree

.github/workflows/python-integ-polaris.yml

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ on:
1818
- main
1919
paths:
2020
- python/src/lance_namespace_impls/polaris.py
21+
- python/src/lance_namespace_impls/rest_client.py
2122
- python/tests/test_polaris.py
23+
- python/tests/test_polaris_integration.py
2224
- docker/polaris/**
2325
- .github/workflows/python-integ-polaris.yml
2426
pull_request:
@@ -29,7 +31,9 @@ on:
2931
- reopened
3032
paths:
3133
- python/src/lance_namespace_impls/polaris.py
34+
- python/src/lance_namespace_impls/rest_client.py
3235
- python/tests/test_polaris.py
36+
- python/tests/test_polaris_integration.py
3337
- docker/polaris/**
3438
- .github/workflows/python-integ-polaris.yml
3539
workflow_dispatch:
@@ -53,7 +57,7 @@ jobs:
5357
uses: astral-sh/setup-uv@v4
5458
- name: Start Polaris
5559
run: make docker-up-polaris
56-
- name: Wait for Polaris to be ready
60+
- name: Wait for Polaris and catalog setup
5761
run: |
5862
echo "Waiting for Polaris to be ready..."
5963
timeout 180 bash -c '
@@ -67,30 +71,32 @@ jobs:
6771
exit 1
6872
}
6973
echo "Polaris is ready"
70-
- name: Create test catalog
71-
run: |
74+
75+
# Wait for polaris-setup to complete (creates test_catalog)
76+
echo "Waiting for polaris-setup to complete..."
77+
timeout 60 bash -c '
78+
while docker ps -q -f name=polaris-setup 2>/dev/null | grep -q .; do
79+
echo "Waiting for polaris-setup container to finish..."
80+
sleep 2
81+
done
82+
' || echo "polaris-setup may have already completed"
83+
84+
# Verify catalog exists
85+
echo "Verifying test_catalog exists..."
7286
TOKEN=$(curl -s -X POST http://localhost:8181/api/catalog/v1/oauth/tokens \
7387
-H 'Content-Type: application/x-www-form-urlencoded' \
7488
-d 'grant_type=client_credentials&client_id=root&client_secret=s3cr3t&scope=PRINCIPAL_ROLE:ALL' | \
7589
python3 -c "import sys, json; print(json.load(sys.stdin).get('access_token', ''))")
76-
if [ -z "$TOKEN" ]; then
77-
echo "Failed to get OAuth token"
78-
exit 1
90+
91+
CATALOGS=$(curl -s http://localhost:8181/api/management/v1/catalogs \
92+
-H "Authorization: Bearer $TOKEN")
93+
94+
if echo "$CATALOGS" | grep -q "test_catalog"; then
95+
echo "test_catalog verified"
96+
else
97+
echo "Warning: test_catalog not found in catalogs: $CATALOGS"
98+
docker compose -f docker/polaris/docker-compose.yml logs polaris-setup
7999
fi
80-
curl -s -X POST http://localhost:8181/api/catalog/v1/catalogs \
81-
-H "Authorization: Bearer $TOKEN" \
82-
-H 'Content-Type: application/json' \
83-
-d '{
84-
"name": "test_catalog",
85-
"type": "INTERNAL",
86-
"properties": {
87-
"default-base-location": "file:///data/warehouse/test_catalog"
88-
},
89-
"storageConfigInfo": {
90-
"storageType": "FILE"
91-
}
92-
}' || echo "Catalog may already exist"
93-
echo "Test catalog created/verified"
94100
- name: Install dependencies
95101
working-directory: python
96102
run: make install-polaris

docker/polaris/docker-compose.yml

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
version: '3.8'
2-
31
services:
42
postgres-polaris:
53
image: postgres:16
@@ -20,24 +18,37 @@ services:
2018
networks:
2119
- polaris-network
2220

23-
polaris:
24-
image: apache/polaris:latest
25-
container_name: polaris
21+
polaris-bootstrap:
22+
image: apache/polaris-admin-tool:1.2.0-incubating
23+
container_name: polaris-bootstrap
2624
depends_on:
2725
postgres-polaris:
2826
condition: service_healthy
2927
environment:
30-
# Bootstrap credentials: realm,client_id,client_secret
28+
POLARIS_PERSISTENCE_TYPE: relational-jdbc
29+
QUARKUS_DATASOURCE_JDBC_URL: jdbc:postgresql://postgres-polaris:5432/polaris
30+
QUARKUS_DATASOURCE_USERNAME: polaris
31+
QUARKUS_DATASOURCE_PASSWORD: polaris
32+
command: ["bootstrap", "-r", "POLARIS", "-c", "POLARIS,root,s3cr3t"]
33+
networks:
34+
- polaris-network
35+
36+
polaris:
37+
image: apache/polaris:1.2.0-incubating
38+
container_name: polaris
39+
depends_on:
40+
polaris-bootstrap:
41+
condition: service_completed_successfully
42+
environment:
3143
POLARIS_BOOTSTRAP_CREDENTIALS: "POLARIS,root,s3cr3t"
32-
# Persistence configuration
3344
POLARIS_PERSISTENCE_TYPE: relational-jdbc
3445
QUARKUS_DATASOURCE_JDBC_URL: jdbc:postgresql://postgres-polaris:5432/polaris
3546
QUARKUS_DATASOURCE_USERNAME: polaris
3647
QUARKUS_DATASOURCE_PASSWORD: polaris
37-
# Allow file:// storage for local testing
38-
POLARIS_FEATURES_DEFAULTS_OVERRIDE_ALLOW_INSECURE_STORAGE_TYPES: "true"
39-
POLARIS_FEATURES_DEFAULTS_OVERRIDE_SUPPORTED_CATALOG_STORAGE_TYPES: '["FILE","S3","GCS","AZURE"]'
40-
# Disable OpenTelemetry for testing
48+
polaris.features."ALLOW_INSECURE_STORAGE_TYPES": "true"
49+
polaris.features."SUPPORTED_CATALOG_STORAGE_TYPES": '["FILE","S3","GCS","AZURE"]'
50+
polaris.features."GENERIC_TABLE_ENABLED": "true"
51+
polaris.readiness.ignore-severe-issues: "true"
4152
QUARKUS_OTEL_SDK_DISABLED: "true"
4253
ports:
4354
- "8181:8181"
@@ -53,6 +64,18 @@ services:
5364
retries: 10
5465
start_period: 30s
5566

67+
polaris-setup:
68+
image: alpine:3.20
69+
container_name: polaris-setup
70+
depends_on:
71+
polaris:
72+
condition: service_healthy
73+
volumes:
74+
- ./init-catalog.sh:/init-catalog.sh:ro
75+
command: ["/bin/sh", "/init-catalog.sh"]
76+
networks:
77+
- polaris-network
78+
5679
volumes:
5780
polaris-postgres-data:
5881
polaris-warehouse:

docker/polaris/init-catalog.sh

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#!/bin/sh
2+
# Wait for Polaris to be healthy
3+
echo "Waiting for Polaris to be ready..."
4+
until wget -q -O /dev/null http://polaris:8182/q/health 2>/dev/null; do
5+
echo "Polaris not ready, waiting..."
6+
sleep 2
7+
done
8+
echo "Polaris is healthy!"
9+
10+
# Get OAuth token
11+
echo "Getting OAuth token..."
12+
TOKEN_RESPONSE=$(wget -q -O - --post-data='grant_type=client_credentials&client_id=root&client_secret=s3cr3t&scope=PRINCIPAL_ROLE:ALL' \
13+
--header='Content-Type: application/x-www-form-urlencoded' \
14+
http://polaris:8181/api/catalog/v1/oauth/tokens)
15+
16+
TOKEN=$(echo "$TOKEN_RESPONSE" | sed -n 's/.*"access_token":"\([^"]*\)".*/\1/p')
17+
18+
if [ -z "$TOKEN" ]; then
19+
echo "Failed to get OAuth token"
20+
echo "Response: $TOKEN_RESPONSE"
21+
exit 1
22+
fi
23+
echo "Token obtained successfully"
24+
25+
# Check if catalog already exists
26+
echo "Checking for existing catalog..."
27+
CATALOGS=$(wget -q -O - --header="Authorization: Bearer $TOKEN" \
28+
http://polaris:8181/api/management/v1/catalogs)
29+
30+
if echo "$CATALOGS" | grep -q '"name":"test_catalog"'; then
31+
echo "Catalog 'test_catalog' already exists"
32+
exit 0
33+
fi
34+
35+
# Create test catalog
36+
echo "Creating test catalog..."
37+
RESULT=$(wget -q -O - --post-data='{
38+
"catalog": {
39+
"name": "test_catalog",
40+
"type": "INTERNAL",
41+
"properties": {
42+
"default-base-location": "file:///data/warehouse/test_catalog"
43+
},
44+
"storageConfigInfo": {
45+
"storageType": "FILE",
46+
"allowedLocations": ["file:///data/warehouse"]
47+
}
48+
}
49+
}' \
50+
--header="Authorization: Bearer $TOKEN" \
51+
--header='Content-Type: application/json' \
52+
http://polaris:8181/api/management/v1/catalogs 2>&1)
53+
54+
echo "Result: $RESULT"
55+
56+
# Verify catalog was created
57+
VERIFY=$(wget -q -O - --header="Authorization: Bearer $TOKEN" \
58+
http://polaris:8181/api/management/v1/catalogs)
59+
60+
if echo "$VERIFY" | grep -q '"name":"test_catalog"'; then
61+
echo "Catalog 'test_catalog' created successfully!"
62+
exit 0
63+
else
64+
echo "Failed to create catalog"
65+
echo "Catalogs: $VERIFY"
66+
exit 1
67+
fi

docs/src/polaris.md

Lines changed: 42 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ This document describes how the Polaris Catalog implements the Lance Namespace c
66

77
Apache Polaris is an open-source catalog implementation for Apache Iceberg that provides a REST API for managing tables and namespaces. Polaris supports the Generic Table API which allows registering non-Iceberg table formats. For details on Polaris Catalog, see the [Polaris Catalog Documentation](https://polaris.apache.org).
88

9+
**Note:** The Generic Table API is available in Polaris 1.2.0-incubating and later versions. Ensure your Polaris deployment is running a compatible version.
10+
911
## Namespace Implementation Configuration Properties
1012

1113
The Lance Polaris namespace implementation accepts the following configuration properties:
@@ -24,11 +26,14 @@ The **max_retries** property is optional and specifies the maximum number of ret
2426

2527
### Namespace
2628

27-
The **root namespace** is represented by the Polaris catalog root, accessed via the `/namespaces` endpoint.
29+
The **namespace identifier** follows a hierarchical structure where the first level represents the Polaris catalog (warehouse), and subsequent levels represent namespaces within that catalog. For example, `my_catalog.my_schema` refers to namespace `my_schema` in catalog `my_catalog`.
2830

29-
A **child namespace** is a nested namespace in Polaris. Polaris supports arbitrary nesting depth, allowing flexible namespace organization. First-level namespaces typically represent catalogs, with subsequent levels representing schemas or other organizational units.
31+
A **child namespace** is a nested namespace in Polaris. Polaris supports arbitrary nesting depth, allowing flexible namespace organization within a catalog.
3032

31-
The **namespace identifier** is constructed by joining namespace levels with the `.` delimiter (e.g., `catalog.schema`). When making API calls, the namespace path is URL-encoded.
33+
The **namespace identifier** is constructed by joining the catalog and namespace levels with the `.` delimiter (e.g., `catalog.schema.subschema`). When making API calls:
34+
- The catalog is extracted as the first level
35+
- Remaining levels form the namespace path within that catalog
36+
- The namespace path is URL-encoded using `.` as the separator
3237

3338
**Namespace properties** are stored in the namespace's properties map, returned by the Polaris namespace API.
3439

@@ -54,10 +59,11 @@ Creates a new namespace in Polaris.
5459

5560
The implementation:
5661

57-
1. Parse the namespace identifier to get the namespace path
58-
2. Construct a CreateNamespaceRequest with the namespace array and properties
59-
3. POST to `/namespaces` endpoint
60-
4. Return the created namespace properties
62+
1. Parse the namespace identifier to extract the catalog (first level) and namespace levels
63+
2. Validate that at least 2 levels are provided (catalog + namespace)
64+
3. Construct a CreateNamespaceRequest with the namespace array and properties
65+
4. POST to `/api/catalog/v1/{catalog}/namespaces` endpoint
66+
5. Return the created namespace properties
6167

6268
**Error Handling:**
6369

@@ -69,10 +75,11 @@ Lists child namespaces under a given parent namespace.
6975

7076
The implementation:
7177

72-
1. Parse the parent namespace identifier
73-
2. For root namespace: GET `/namespaces`
74-
3. For nested namespace: GET `/namespaces/{parent}/namespaces`
75-
4. Convert the response namespace arrays to dot-separated strings
78+
1. Parse the parent namespace identifier to extract the catalog (first level)
79+
2. Validate that at least 1 level (catalog) is provided
80+
3. For catalog-level listing: GET `/api/catalog/v1/{catalog}/namespaces`
81+
4. For nested namespace listing: GET `/api/catalog/v1/{catalog}/namespaces/{parent}/namespaces`
82+
5. Convert the response namespace arrays to dot-separated strings, prefixing with the catalog name
7683

7784
**Error Handling:**
7885

@@ -84,9 +91,10 @@ Retrieves properties and metadata for a namespace.
8491

8592
The implementation:
8693

87-
1. Parse the namespace identifier
88-
2. GET `/namespaces/{namespace}` with URL-encoded namespace path
89-
3. Return the namespace properties
94+
1. Parse the namespace identifier to extract the catalog (first level) and namespace path
95+
2. Validate that at least 2 levels are provided (catalog + namespace)
96+
3. GET `/api/catalog/v1/{catalog}/namespaces/{namespace}` with URL-encoded namespace path
97+
4. Return the namespace properties
9098

9199
**Error Handling:**
92100

@@ -98,8 +106,9 @@ Removes a namespace from Polaris. Only RESTRICT mode is supported; CASCADE mode
98106

99107
The implementation:
100108

101-
1. Parse the namespace identifier
102-
2. DELETE `/namespaces/{namespace}` with URL-encoded namespace path
109+
1. Parse the namespace identifier to extract the catalog (first level) and namespace path
110+
2. Validate that at least 2 levels are provided (catalog + namespace)
111+
3. DELETE `/api/catalog/v1/{catalog}/namespaces/{namespace}` with URL-encoded namespace path
103112

104113
**Error Handling:**
105114

@@ -115,15 +124,16 @@ Declares a new Lance table in Polaris without creating the underlying data.
115124

116125
The implementation:
117126

118-
1. Parse the table identifier to extract namespace and table name
119-
2. Construct a CreateGenericTableRequest with:
127+
1. Parse the table identifier to extract catalog (first level), namespace (middle levels), and table name (last level)
128+
2. Validate that at least 3 levels are provided (catalog + namespace + table)
129+
3. Construct a CreateGenericTableRequest with:
120130
- `name`: the table name
121131
- `format`: `lance`
122132
- `base-location`: the specified location
123133
- `doc`: optional description from properties
124134
- `properties`: table properties including `table_type=lance`
125-
3. POST to `/namespaces/{namespace}/generic-tables`
126-
4. Return the created table location and properties
135+
4. POST to `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables`
136+
5. Return the created table location and properties
127137

128138
**Error Handling:**
129139

@@ -135,9 +145,10 @@ Lists all Lance tables in a namespace.
135145

136146
The implementation:
137147

138-
1. Parse the namespace identifier
139-
2. GET `/namespaces/{namespace}/generic-tables`
140-
3. Extract table names from the response identifiers
148+
1. Parse the namespace identifier to extract the catalog (first level) and namespace path
149+
2. Validate that at least 2 levels are provided (catalog + namespace)
150+
3. GET `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables`
151+
4. Extract table names from the response identifiers
141152

142153
**Error Handling:**
143154

@@ -149,10 +160,11 @@ Retrieves metadata for a Lance table. Only `load_detailed_metadata=false` is sup
149160

150161
The implementation:
151162

152-
1. Parse the table identifier to extract namespace and table name
153-
2. GET `/namespaces/{namespace}/generic-tables/{table}`
154-
3. Verify the table format is `lance`
155-
4. Return the table location from `base-location` and storage_options from `properties`
163+
1. Parse the table identifier to extract catalog (first level), namespace (middle levels), and table name (last level)
164+
2. Validate that at least 3 levels are provided (catalog + namespace + table)
165+
3. GET `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables/{table}`
166+
4. Verify the table format is `lance`
167+
5. Return the table location from `base-location` and storage_options from `properties`
156168

157169
**Error Handling:**
158170

@@ -168,8 +180,9 @@ Removes a Lance table registration from Polaris without deleting the underlying
168180

169181
The implementation:
170182

171-
1. Parse the table identifier to extract namespace and table name
172-
2. DELETE `/namespaces/{namespace}/generic-tables/{table}`
183+
1. Parse the table identifier to extract catalog (first level), namespace (middle levels), and table name (last level)
184+
2. Validate that at least 3 levels are provided (catalog + namespace + table)
185+
3. DELETE `/api/catalog/polaris/v1/{catalog}/namespaces/{namespace}/generic-tables/{table}`
173186

174187
**Error Handling:**
175188

java/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ integ-test-hive3:
151151

152152
.PHONY: integ-test-polaris
153153
integ-test-polaris:
154-
./mvnw test -pl lance-namespace-polaris -Dtest="*IntegrationTest" -DfailIfNoTests=false
154+
./mvnw test -pl lance-namespace-polaris -Dtest="*Integration" -DfailIfNoTests=false
155155

156156
.PHONY: integ-test-unity
157157
integ-test-unity:

0 commit comments

Comments
 (0)