88from typing import TYPE_CHECKING , Any , AsyncIterator , Optional
99
1010from dateutil import parser
11- from pydantic import Field , Secret
11+ from pydantic import Field , Secret , model_validator
1212
1313from unstructured_ingest .data_types .file_data import (
1414 FileData ,
5454
5555
5656class OnedriveAccessConfig (AccessConfig ):
57- client_cred : str = Field (description = "Microsoft App client secret" )
57+ client_cred : Optional [ str ] = Field (default = None , description = "Microsoft App client secret" )
5858 password : Optional [str ] = Field (description = "Service account password" , default = None )
59+ oauth_token : Optional [str ] = Field (
60+ default = None ,
61+ description = (
62+ "OAuth 2.0 access token for delegated user authentication. "
63+ "Tokens typically expire after ~1 hour; this connector does not "
64+ "refresh tokens."
65+ ),
66+ )
67+
68+ def model_post_init (self , __context : Any ) -> None :
69+ # Use truthiness so empty strings (e.g. from unset env vars) are treated
70+ # consistently with the runtime auth-mode check in get_token below.
71+ has_client_cred = bool (self .client_cred )
72+ has_oauth_token = bool (self .oauth_token )
73+ has_password = bool (self .password )
74+
75+ if not has_client_cred and not has_oauth_token :
76+ raise ValueError ("either client_cred or oauth_token must be set" )
77+
78+ if has_oauth_token and (has_client_cred or has_password ):
79+ raise ValueError ("cannot use both oauth_token and client_cred/password authentication" )
5980
6081
6182class OnedriveConnectionConfig (ConnectionConfig ):
62- client_id : str = Field (description = "Microsoft app client ID" )
83+ client_id : Optional [str ] = Field (
84+ default = None ,
85+ description = (
86+ "Microsoft app client ID. Required for app-only and password-grant authentication;"
87+ " not required when using oauth_token."
88+ ),
89+ )
6390 user_pname : str = Field (
6491 description = "User principal name or service account, usually your Azure AD email."
6592 )
@@ -74,6 +101,25 @@ class OnedriveConnectionConfig(ConnectionConfig):
74101 )
75102 access_config : Secret [OnedriveAccessConfig ]
76103
104+ @model_validator (mode = "after" )
105+ def _require_client_id_without_oauth (self ) -> "OnedriveConnectionConfig" :
106+ # client_id lives on ConnectionConfig (above) and oauth_token on AccessConfig,
107+ # so this cross-field rule can't live in either model_post_init alone.
108+ if not self .access_config .get_secret_value ().oauth_token and not self .client_id :
109+ raise ValueError ("client_id is required when oauth_token is not set" )
110+ return self
111+
112+ def _log_oauth_advisory (self ) -> None :
113+ """Emit a one-shot advisory at precheck time when delegated OAuth is in use.
114+
115+ Lives on ConnectionConfig so Indexer/Uploader/Downloader prechecks share
116+ one source of truth instead of each duplicating the message. Called from
117+ precheck (once per step instance) rather than from get_token (called per
118+ Graph request) to avoid log spam during normal indexing.
119+ """
120+ if self .access_config .get_secret_value ().oauth_token :
121+ logger .warning ("Using OAuth token authentication. Tokens expire after ~1 hour." )
122+
77123 def get_drive (self ) -> "Drive" :
78124 client = self .get_client ()
79125 drive = client .users [self .user_pname ].drive
@@ -84,7 +130,14 @@ def get_token(self):
84130 from msal import ConfidentialClientApplication
85131 from requests import post
86132
87- if self .access_config .get_secret_value ().password :
133+ access_config = self .access_config .get_secret_value ()
134+
135+ if access_config .oauth_token :
136+ # Delegated user authentication: hand the access token through directly.
137+ # Tokens typically expire after ~1 hour; refresh is not handled here.
138+ return {"access_token" : access_config .oauth_token , "token_type" : "Bearer" }
139+
140+ if access_config .password :
88141 url = f"https://login.microsoftonline.com/{ self .tenant } /oauth2/v2.0/token"
89142 headers = {"Content-Type" : "application/x-www-form-urlencoded" }
90143 data = {
@@ -160,6 +213,7 @@ class OnedriveIndexer(Indexer):
160213 connector_type : str = CONNECTOR_TYPE
161214
162215 def precheck (self ) -> None :
216+ self .connection_config ._log_oauth_advisory ()
163217 try :
164218 token_resp : dict = self .connection_config .get_token ()
165219 if error := token_resp .get ("error" ):
@@ -358,6 +412,7 @@ class OnedriveUploader(Uploader):
358412 def precheck (self ) -> None :
359413 from office365 .runtime .client_request_exception import ClientRequestException
360414
415+ self .connection_config ._log_oauth_advisory ()
361416 try :
362417 token_resp : dict = self .connection_config .get_token ()
363418 if error := token_resp .get ("error" ):
0 commit comments