-
Notifications
You must be signed in to change notification settings - Fork 58
feat(microsoft): add delegated oauth_token authentication #706
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,83 @@ | ||
| import pytest | ||
| from pydantic import Secret | ||
|
|
||
| from unstructured_ingest.error import ValueError | ||
| from unstructured_ingest.processes.connectors.onedrive import ( | ||
| OnedriveAccessConfig, | ||
| OnedriveConnectionConfig, | ||
| ) | ||
|
|
||
|
|
||
| class TestOnedriveAccessConfig: | ||
| """Tests for OnedriveAccessConfig authentication validation.""" | ||
|
|
||
| def test_client_cred_only(self): | ||
| """Client credential alone should be valid (app-only authentication).""" | ||
| config = OnedriveAccessConfig(client_cred="secret-value") | ||
| assert config.client_cred == "secret-value" | ||
| assert config.oauth_token is None | ||
|
|
||
| def test_client_cred_and_password(self): | ||
| """client_cred + password is the password-grant flow and should be valid.""" | ||
| config = OnedriveAccessConfig(client_cred="secret-value", password="user-password") | ||
| assert config.client_cred == "secret-value" | ||
| assert config.password == "user-password" | ||
| assert config.oauth_token is None | ||
|
|
||
| def test_oauth_token_only(self): | ||
| """OAuth token alone should be valid (delegated authentication).""" | ||
| config = OnedriveAccessConfig(oauth_token="ey.access.token") | ||
| assert config.oauth_token == "ey.access.token" | ||
| assert config.client_cred is None | ||
|
|
||
| def test_no_auth_raises_error(self): | ||
| """No authentication provided should raise ValueError.""" | ||
| with pytest.raises(ValueError, match="must be set"): | ||
| OnedriveAccessConfig() | ||
|
|
||
| def test_oauth_and_client_cred_raises_error(self): | ||
| """Both oauth_token and client_cred provided should raise ValueError.""" | ||
| with pytest.raises(ValueError, match="cannot use both"): | ||
| OnedriveAccessConfig( | ||
| client_cred="secret-value", | ||
| oauth_token="ey.access.token", | ||
| ) | ||
|
|
||
| def test_oauth_and_password_raises_error(self): | ||
| """oauth_token combined with password should raise ValueError.""" | ||
| with pytest.raises(ValueError, match="cannot use both"): | ||
| OnedriveAccessConfig( | ||
| password="user-password", | ||
| oauth_token="ey.access.token", | ||
| ) | ||
|
|
||
| def test_empty_oauth_token_treated_as_missing(self): | ||
| """An empty-string oauth_token (e.g. unset env var) should not satisfy the auth requirement. | ||
|
|
||
| Validator and runtime both use truthiness; this test pins that consistency. | ||
| """ | ||
| with pytest.raises(ValueError, match="must be set"): | ||
| OnedriveAccessConfig(oauth_token="") | ||
|
|
||
|
|
||
| class TestOnedriveConnectionConfig: | ||
| """Tests for OnedriveConnectionConfig cross-field auth validation.""" | ||
|
|
||
| def test_client_cred_without_client_id_raises(self): | ||
| """client_cred-based auth requires client_id; rejecting at config time | ||
| avoids cryptic AADSTS / MSAL errors at runtime.""" | ||
| with pytest.raises(ValueError, match="client_id is required"): | ||
| OnedriveConnectionConfig( | ||
| user_pname="alice@contoso.com", | ||
| tenant="tenant-id", | ||
| access_config=Secret(OnedriveAccessConfig(client_cred="secret-value")), | ||
| ) | ||
|
|
||
| def test_oauth_token_without_client_id_succeeds(self): | ||
| """oauth_token auth doesn't need client_id; this is the delegated path.""" | ||
| config = OnedriveConnectionConfig( | ||
| user_pname="alice@contoso.com", | ||
| tenant="tenant-id", | ||
| access_config=Secret(OnedriveAccessConfig(oauth_token="ey.access.token")), | ||
| ) | ||
| assert config.client_id is None |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| import pytest | ||
| from pydantic import Secret | ||
|
|
||
| from unstructured_ingest.error import ValueError | ||
| from unstructured_ingest.processes.connectors.outlook import ( | ||
| OutlookAccessConfig, | ||
| OutlookConnectionConfig, | ||
| ) | ||
|
|
||
|
|
||
| class TestOutlookAccessConfig: | ||
| """Tests for OutlookAccessConfig authentication validation.""" | ||
|
|
||
| def test_client_cred_only(self): | ||
| """Client credential alone should be valid (app-only authentication).""" | ||
| config = OutlookAccessConfig(client_cred="secret-value") | ||
| # `client_credential` is the field name; `client_cred` is the alias. | ||
| assert config.client_credential == "secret-value" | ||
| assert config.oauth_token is None | ||
|
|
||
| def test_oauth_token_only(self): | ||
| """OAuth token alone should be valid (delegated authentication).""" | ||
| config = OutlookAccessConfig(oauth_token="ey.access.token") | ||
| assert config.oauth_token == "ey.access.token" | ||
| assert config.client_credential is None | ||
|
|
||
| def test_no_auth_raises_error(self): | ||
| """No authentication provided should raise ValueError.""" | ||
| with pytest.raises(ValueError, match="must be set"): | ||
| OutlookAccessConfig() | ||
|
|
||
| def test_oauth_and_client_cred_raises_error(self): | ||
| """Both oauth_token and client_cred provided should raise ValueError.""" | ||
| with pytest.raises(ValueError, match="cannot use both"): | ||
| OutlookAccessConfig( | ||
| client_cred="secret-value", | ||
| oauth_token="ey.access.token", | ||
| ) | ||
|
|
||
| def test_empty_oauth_token_treated_as_missing(self): | ||
| """An empty-string oauth_token (e.g. unset env var) should not satisfy the auth requirement. | ||
|
|
||
| Validator and runtime both use truthiness; this test pins that consistency. | ||
| """ | ||
| with pytest.raises(ValueError, match="must be set"): | ||
| OutlookAccessConfig(oauth_token="") | ||
|
|
||
|
|
||
| class TestOutlookConnectionConfig: | ||
| """Tests for OutlookConnectionConfig cross-field auth validation.""" | ||
|
|
||
| def test_client_cred_without_client_id_raises(self): | ||
| """client_cred-based auth requires client_id; rejecting at config time | ||
| avoids cryptic AADSTS / MSAL errors at runtime.""" | ||
| with pytest.raises(ValueError, match="client_id is required"): | ||
| OutlookConnectionConfig( | ||
| access_config=Secret(OutlookAccessConfig(client_cred="secret-value")), | ||
| ) | ||
|
|
||
| def test_oauth_token_without_client_id_succeeds(self): | ||
| """oauth_token auth doesn't need client_id; this is the delegated path.""" | ||
| config = OutlookConnectionConfig( | ||
| access_config=Secret(OutlookAccessConfig(oauth_token="ey.access.token")), | ||
| ) | ||
| assert config.client_id is None |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1 @@ | ||
| __version__ = "1.5.1" # pragma: no cover | ||
| __version__ = "1.5.2" # pragma: no cover |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,7 @@ | |
| from typing import TYPE_CHECKING, Any, AsyncIterator, Optional | ||
|
|
||
| from dateutil import parser | ||
| from pydantic import Field, Secret | ||
| from pydantic import Field, Secret, model_validator | ||
|
|
||
| from unstructured_ingest.data_types.file_data import ( | ||
| FileData, | ||
|
|
@@ -54,12 +54,39 @@ | |
|
|
||
|
|
||
| class OnedriveAccessConfig(AccessConfig): | ||
| client_cred: str = Field(description="Microsoft App client secret") | ||
| client_cred: Optional[str] = Field(default=None, description="Microsoft App client secret") | ||
| password: Optional[str] = Field(description="Service account password", default=None) | ||
| oauth_token: Optional[str] = Field( | ||
| default=None, | ||
| description=( | ||
| "OAuth 2.0 access token for delegated user authentication. " | ||
| "Tokens typically expire after ~1 hour; this connector does not " | ||
| "refresh tokens." | ||
| ), | ||
| ) | ||
|
|
||
| def model_post_init(self, __context: Any) -> None: | ||
| # Use truthiness so empty strings (e.g. from unset env vars) are treated | ||
| # consistently with the runtime auth-mode check in get_token below. | ||
| has_client_cred = bool(self.client_cred) | ||
| has_oauth_token = bool(self.oauth_token) | ||
| has_password = bool(self.password) | ||
|
|
||
| if not has_client_cred and not has_oauth_token: | ||
| raise ValueError("either client_cred or oauth_token must be set") | ||
|
|
||
| if has_oauth_token and (has_client_cred or has_password): | ||
| raise ValueError("cannot use both oauth_token and client_cred/password authentication") | ||
|
|
||
|
|
||
| class OnedriveConnectionConfig(ConnectionConfig): | ||
| client_id: str = Field(description="Microsoft app client ID") | ||
| client_id: Optional[str] = Field( | ||
| default=None, | ||
| description=( | ||
| "Microsoft app client ID. Required for app-only and password-grant authentication;" | ||
| " not required when using oauth_token." | ||
| ), | ||
| ) | ||
|
cursor[bot] marked this conversation as resolved.
|
||
| user_pname: str = Field( | ||
| description="User principal name or service account, usually your Azure AD email." | ||
| ) | ||
|
|
@@ -74,6 +101,25 @@ class OnedriveConnectionConfig(ConnectionConfig): | |
| ) | ||
| access_config: Secret[OnedriveAccessConfig] | ||
|
|
||
| @model_validator(mode="after") | ||
| def _require_client_id_without_oauth(self) -> "OnedriveConnectionConfig": | ||
| # client_id lives on ConnectionConfig (above) and oauth_token on AccessConfig, | ||
| # so this cross-field rule can't live in either model_post_init alone. | ||
| if not self.access_config.get_secret_value().oauth_token and not self.client_id: | ||
| raise ValueError("client_id is required when oauth_token is not set") | ||
| return self | ||
|
|
||
| def _log_oauth_advisory(self) -> None: | ||
| """Emit a one-shot advisory at precheck time when delegated OAuth is in use. | ||
|
|
||
| Lives on ConnectionConfig so Indexer/Uploader/Downloader prechecks share | ||
| one source of truth instead of each duplicating the message. Called from | ||
| precheck (once per step instance) rather than from get_token (called per | ||
| Graph request) to avoid log spam during normal indexing. | ||
| """ | ||
| if self.access_config.get_secret_value().oauth_token: | ||
| logger.warning("Using OAuth token authentication. Tokens expire after ~1 hour.") | ||
|
|
||
| def get_drive(self) -> "Drive": | ||
| client = self.get_client() | ||
| drive = client.users[self.user_pname].drive | ||
|
|
@@ -84,7 +130,14 @@ def get_token(self): | |
| from msal import ConfidentialClientApplication | ||
| from requests import post | ||
|
|
||
| if self.access_config.get_secret_value().password: | ||
| access_config = self.access_config.get_secret_value() | ||
|
|
||
| if access_config.oauth_token: | ||
| # Delegated user authentication: hand the access token through directly. | ||
| # Tokens typically expire after ~1 hour; refresh is not handled here. | ||
| return {"access_token": access_config.oauth_token, "token_type": "Bearer"} | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oauth_token path unnecessarily requires msal dependencyLow Severity The Additional Locations (1)Reviewed by Cursor Bugbot for commit 7fb41d4. Configure here. |
||
|
|
||
| if access_config.password: | ||
| url = f"https://login.microsoftonline.com/{self.tenant}/oauth2/v2.0/token" | ||
| headers = {"Content-Type": "application/x-www-form-urlencoded"} | ||
| data = { | ||
|
|
@@ -160,6 +213,7 @@ class OnedriveIndexer(Indexer): | |
| connector_type: str = CONNECTOR_TYPE | ||
|
|
||
| def precheck(self) -> None: | ||
| self.connection_config._log_oauth_advisory() | ||
| try: | ||
| token_resp: dict = self.connection_config.get_token() | ||
| if error := token_resp.get("error"): | ||
|
|
@@ -358,6 +412,7 @@ class OnedriveUploader(Uploader): | |
| def precheck(self) -> None: | ||
| from office365.runtime.client_request_exception import ClientRequestException | ||
|
|
||
| self.connection_config._log_oauth_advisory() | ||
| try: | ||
| token_resp: dict = self.connection_config.get_token() | ||
| if error := token_resp.get("error"): | ||
|
|
||


Uh oh!
There was an error while loading. Please reload this page.