Skip to content

Commit 10ecf46

Browse files
enabling oauth flow in pyiceberg for service users
1 parent 45fec39 commit 10ecf46

28 files changed

Lines changed: 2347 additions & 1939 deletions

docs/features/pyiceberg_testing.md

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,20 +144,36 @@ Pangolin supports both **OAuth 2.0** and **Basic Authentication**.
144144

145145
> **Note**: For detailed configuration of OAuth providers (Google, GitHub, etc.), please see [Authentication](../authentication.md).
146146
147-
### Option 1: Using an OAuth Token (Recommended)
147+
### Option 1: Standard OAuth2 (Recommended for Production)
148+
The most secure way to authenticate scripts and services is using the standard OAuth2 client credentials flow with a Service User.
148149

149-
After logging in via the UI/OAuth flow, you can use your JWT token directly.
150+
```python
151+
from pyiceberg.catalog import load_catalog
152+
153+
catalog = load_catalog(
154+
"pangolin",
155+
**{
156+
"uri": "http://localhost:8080/api/v1/iceberg/default",
157+
"type": "rest",
158+
"credential": "<service_user_id>:<service_user_api_key>",
159+
"oauth2-server-uri": "http://localhost:8080/v1/rest/v1/oauth/tokens",
160+
}
161+
)
162+
```
163+
164+
### Option 2: Using an Existing Token (Temporary)
165+
166+
If you have a JWT token from the UI or another login flow, you can use it directly.
150167

151168
```python
152169
from pyiceberg.catalog import load_catalog
153170

154171
catalog = load_catalog(
155172
"pangolin",
156173
**{
157-
"uri": "http://localhost:8080/iceberg/default", # 'default' is the catalog name
174+
"uri": "http://localhost:8080/api/v1/iceberg/default",
158175
"type": "rest",
159-
"token": "YOUR_JWT_TOKEN", # Token from UI or OAuth callback
160-
# No S3 keys needed if Credential Vending is active
176+
"token": "YOUR_JWT_TOKEN", # Token from UI or Login
161177
}
162178
)
163179
```

docs/features/service_users.md

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,31 @@ curl -H "X-API-Key: pgl_YOUR_SECRET_KEY" \
3838
https://your-pangolin-api.com/api/v1/catalogs
3939
```
4040

41-
### Integration: PyIceberg
41+
### Integration: PyIceberg (Standard OAuth2)
42+
PyIceberg supports standard OAuth2, which is the recommended way to use Service Users.
43+
4244
```python
4345
from pyiceberg.catalog import load_catalog
4446

4547
catalog = load_catalog(
4648
"pangolin",
4749
**{
48-
"uri": "https://api.pangolin.io",
49-
"header.X-API-Key": "pgl_YOUR_SECRET_KEY",
50+
"uri": "https://api.pangolin.io/api/v1/iceberg/default",
51+
"credential": "<service_user_uuid>:<api_key>",
52+
"oauth2-server-uri": "https://api.pangolin.io/v1/rest/v1/oauth/tokens",
53+
"type": "rest",
5054
}
5155
)
5256
```
5357

58+
### Integration: Custom Clients (X-API-Key)
59+
For simple HTTP clients that don't support OAuth2, you can use the header:
60+
```python
61+
import requests
62+
headers = {"X-API-Key": "pgl_YOUR_SECRET_KEY"}
63+
response = requests.get("https://api.pangolin.io/api/v1/catalogs", headers=headers)
64+
```
65+
5466
---
5567

5668
## 🚦 Best Practices

pangolin/Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pangolin/pangolin_api/src/auth_middleware.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,8 @@ pub async fn auth_middleware(
173173
path == "/v1/config" ||
174174
path.ends_with("/config") ||
175175
path.starts_with("/oauth/authorize/") ||
176-
path.starts_with("/oauth/callback/") {
176+
path.starts_with("/oauth/callback/") ||
177+
path.contains("/oauth/tokens") {
177178
return next.run(req).await;
178179
}
179180

@@ -335,13 +336,16 @@ pub async fn auth_middleware_wrapper(
335336

336337
// Whitelist public endpoints
337338
let path = req.uri().path();
339+
tracing::error!("AUTH CHECK: Checking path '{}'", path);
340+
338341
if path == "/health" ||
339342
path == "/api/v1/users/login" ||
340343
path == "/api/v1/app-config" ||
341344
path == "/v1/config" ||
342345
path.ends_with("/config") ||
343346
path.starts_with("/oauth/authorize/") ||
344-
path.starts_with("/oauth/callback/") {
347+
path.starts_with("/oauth/callback/") ||
348+
path.contains("/oauth/tokens") {
345349
return next.run(req).await;
346350
}
347351

pangolin/pangolin_api/src/iceberg/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use crate::federated_proxy::FederatedCatalogProxy;
1313
pub mod config;
1414
pub mod namespaces;
1515
pub mod tables;
16+
pub mod oauth;
1617
pub mod types;
1718

1819
// Re-export types for convenience
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
use axum::{
2+
extract::{State, Form},
3+
response::{IntoResponse, Json},
4+
http::StatusCode,
5+
};
6+
use serde::{Deserialize, Serialize};
7+
use serde_json::json;
8+
use pangolin_store::CatalogStore;
9+
// Removed Signer import as we implement signing locally
10+
use pangolin_core::user::{User, UserRole, UserSession, ServiceUser};
11+
use chrono::{Utc, Duration};
12+
use anyhow::Context;
13+
use uuid::Uuid;
14+
use bcrypt::verify;
15+
use jsonwebtoken::{encode, EncodingKey, Header};
16+
use crate::auth::Claims;
17+
18+
// Internal imports
19+
use crate::error::ApiError;
20+
use crate::iceberg::AppState;
21+
22+
#[derive(Deserialize)]
23+
pub struct OAuthTokenRequest {
24+
grant_type: String,
25+
client_id: String,
26+
client_secret: String,
27+
scope: Option<String>,
28+
}
29+
30+
#[derive(Serialize)]
31+
pub struct OAuthTokenResponse {
32+
access_token: String,
33+
token_type: String,
34+
expires_in: u64,
35+
issued_token_type: String,
36+
}
37+
38+
/// Handler for the standard OAuth2 client_credentials flow
39+
///
40+
/// This endpoint accepts `application/x-www-form-urlencoded` data
41+
/// to support standard libraries like PyIceberg/REST Catalog.
42+
///
43+
/// It maps:
44+
/// - `client_id` -> Service User ID (UUID)
45+
/// - `client_secret` -> Service User API Key
46+
///
47+
/// If valid, it returns a standard Pangolin JWT signed by the server key.
48+
pub async fn handle_oauth_token(
49+
State(store): State<AppState>,
50+
Form(payload): Form<OAuthTokenRequest>,
51+
) -> Result<impl IntoResponse, ApiError> {
52+
// 1. Validate Grant Type
53+
if payload.grant_type != "client_credentials" {
54+
return Err(ApiError::bad_request("Unsupported grant_type. exact 'client_credentials' required."));
55+
}
56+
57+
// 2. Parse Client ID as UUID
58+
let service_user_id = Uuid::parse_str(&payload.client_id)
59+
.map_err(|_| ApiError::bad_request("Invalid client_id format. Must be a valid UUID."))?;
60+
61+
// 3. Retrieve Service User
62+
let store_ref = &*store;
63+
let service_user_result: anyhow::Result<Option<ServiceUser>> = store_ref.get_service_user(service_user_id).await;
64+
let service_user = service_user_result
65+
.map_err(|e| ApiError::InternalError(e))?
66+
.ok_or_else(|| ApiError::unauthorized("Invalid client_id"))?;
67+
68+
// 4. Verify Active Status
69+
if !service_user.active {
70+
return Err(ApiError::unauthorized("Client is inactive"));
71+
}
72+
73+
// 5. Verify Secret (API Key)
74+
// The stored hash is bcrypt
75+
let valid_secret = verify(&payload.client_secret, &service_user.api_key_hash)
76+
.map_err(|e| ApiError::InternalError(anyhow::anyhow!("Crypto failure: {}", e)))?;
77+
78+
if !valid_secret {
79+
return Err(ApiError::unauthorized("Invalid client_secret"));
80+
}
81+
82+
// 6. Generate Session/Token
83+
// We reuse the standard JWT generation logic used for users
84+
let now = Utc::now();
85+
let expires_in_seconds = 3600; // 1 hour default
86+
let expires_at = now + Duration::seconds(expires_in_seconds as i64);
87+
88+
let token_id = Uuid::new_v4();
89+
let secret = std::env::var("PANGOLIN_JWT_SECRET").unwrap_or_else(|_| "default_secret_for_dev".to_string());
90+
91+
let claims = Claims {
92+
sub: service_user.id.to_string(),
93+
jti: Some(token_id.to_string()),
94+
username: service_user.name.clone(),
95+
tenant_id: Some(service_user.tenant_id.to_string()),
96+
role: service_user.role.clone(),
97+
exp: expires_at.timestamp(),
98+
iat: now.timestamp(),
99+
};
100+
101+
// We need to sign this.
102+
let token = encode(&Header::default(), &claims, &EncodingKey::from_secret(secret.as_bytes()))
103+
.map_err(|e| ApiError::InternalError(anyhow::anyhow!("Token generation failed: {}", e)))?;
104+
105+
// 7. Update Last Used
106+
// Best effort - don't fail auth if this fails
107+
let _ = store_ref.update_service_user_last_used(service_user.id, now).await;
108+
109+
// 8. Return Response
110+
Ok(Json(OAuthTokenResponse {
111+
access_token: token,
112+
token_type: "Bearer".to_string(),
113+
expires_in: expires_in_seconds,
114+
issued_token_type: "urn:ietf:params:oauth:token-type:access_token".to_string(),
115+
}))
116+
}

pangolin/pangolin_api/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ pub fn app(store: Arc<dyn CatalogStore + Send + Sync>) -> Router {
9797
.route("/v1/:prefix/v1/namespaces/:namespace/tables/:table/maintenance", post(iceberg::tables::perform_maintenance))
9898
.route("/v1/:prefix/v1/namespaces/:namespace/tables/:table/metrics", post(iceberg::tables::report_metrics))
9999
.route("/v1/:prefix/v1/tables/rename", post(iceberg::tables::rename_table))
100+
.route("/v1/:prefix/v1/oauth/tokens", post(iceberg::oauth::handle_oauth_token))
101+
.route("/v1/:prefix/oauth/tokens", post(iceberg::oauth::handle_oauth_token)) // Support non-nested v1 too just in case
102+
// Support /api/v1/iceberg prefix style
103+
.route("/api/v1/iceberg/:prefix/v1/oauth/tokens", post(iceberg::oauth::handle_oauth_token))
104+
.route("/api/v1/iceberg/:prefix/oauth/tokens", post(iceberg::oauth::handle_oauth_token))
100105
// Pangolin Extended APIs
101106
// Branch Operations
102107
.route("/api/v1/branches", post(pangolin_handlers::create_branch).get(pangolin_handlers::list_branches))

pangolin/pangolin_cli_admin/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ log = "0.4"
1717
env_logger = "0.10"
1818
serde_json = "1.0"
1919
urlencoding = "2.1"
20+
chrono = { version = "0.4", features = ["serde"] }
2021

2122
[[bin]]
2223
name = "pangolin-admin"

0 commit comments

Comments
 (0)