Skip to content

Commit c7108dc

Browse files
authored
Merge pull request #41 from UiPath/feature/highspot
Add Highspot connector + fix slack-bot create button silent submit
2 parents b33ff20 + 46c9042 commit c7108dc

13 files changed

Lines changed: 1445 additions & 3 deletions

File tree

backend/danswer/configs/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class DocumentSource(str, Enum):
106106
R2 = "r2"
107107
GOOGLE_CLOUD_STORAGE = "google_cloud_storage"
108108
OCI_STORAGE = "oci_storage"
109+
HIGHSPOT = "highspot"
109110

110111

111112
class BlobType(str, Enum):

backend/danswer/connectors/factory.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from danswer.connectors.google_drive.connector import GoogleDriveConnector
2323
from danswer.connectors.google_site.connector import GoogleSitesConnector
2424
from danswer.connectors.guru.connector import GuruConnector
25+
from danswer.connectors.highspot.connector import HighspotConnector
2526
from danswer.connectors.hubspot.connector import HubSpotConnector
2627
from danswer.connectors.interfaces import BaseConnector
2728
from danswer.connectors.interfaces import EventConnector
@@ -99,6 +100,7 @@ def identify_connector_class(
99100
DocumentSource.R2: BlobStorageConnector,
100101
DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector,
101102
DocumentSource.OCI_STORAGE: BlobStorageConnector,
103+
DocumentSource.HIGHSPOT: HighspotConnector,
102104
}
103105
connector_by_source = connector_map.get(source, {})
104106

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""Highspot connector — indexes Spots and Items from a Highspot tenant.
2+
3+
Authenticates via HTTP Basic with an API key/secret pair generated from
4+
Highspot's admin console. Supports an optional `spot_names` allowlist;
5+
when empty, all Spots accessible to the credential are indexed.
6+
"""
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
"""Thin HTTP client for the Highspot REST API.
2+
3+
Ported from upstream Onyx (`onyx-dot-app/onyx`) with only the logger
4+
import path changed to point at this fork's `danswer.utils.logger`.
5+
The HTTP / auth / retry shape is fork-agnostic and intentionally kept
6+
verbatim so future upstream tweaks can be cherry-picked cleanly.
7+
"""
8+
import base64
9+
from typing import Any
10+
from typing import Dict
11+
from typing import List
12+
from typing import Optional
13+
from urllib.parse import urljoin
14+
15+
import requests
16+
from requests.adapters import HTTPAdapter
17+
from requests.exceptions import HTTPError
18+
from requests.exceptions import RequestException
19+
from requests.exceptions import Timeout
20+
from urllib3.util.retry import Retry
21+
22+
from danswer.utils.logger import setup_logger
23+
24+
logger = setup_logger()
25+
PAGE_SIZE = 100
26+
27+
28+
class HighspotClientError(Exception):
29+
"""Base exception for Highspot API client errors."""
30+
31+
def __init__(self, message: str, status_code: Optional[int] = None):
32+
self.message = message
33+
self.status_code = status_code
34+
super().__init__(self.message)
35+
36+
37+
class HighspotAuthenticationError(HighspotClientError):
38+
"""Exception raised for authentication errors."""
39+
40+
41+
class HighspotRateLimitError(HighspotClientError):
42+
"""Exception raised when rate limit is exceeded."""
43+
44+
def __init__(self, message: str, retry_after: Optional[str] = None):
45+
self.retry_after = retry_after
46+
super().__init__(message)
47+
48+
49+
class HighspotClient:
50+
"""
51+
Client for interacting with the Highspot API.
52+
53+
Uses basic authentication with provided key (username) and secret
54+
(password). Implements retry logic, error handling, and connection
55+
pooling via a `requests.Session`.
56+
"""
57+
58+
BASE_URL = "https://api-su2.highspot.com/v1.0/"
59+
60+
def __init__(
61+
self,
62+
key: str,
63+
secret: str,
64+
base_url: str = BASE_URL,
65+
timeout: int = 30,
66+
max_retries: int = 3,
67+
backoff_factor: float = 0.5,
68+
status_forcelist: Optional[List[int]] = None,
69+
):
70+
if not key or not secret:
71+
raise ValueError("API key and secret are required")
72+
73+
self.key = key
74+
self.secret = secret
75+
self.base_url = base_url.rstrip("/") + "/"
76+
self.timeout = timeout
77+
78+
self.session = requests.Session()
79+
retry_strategy = Retry(
80+
total=max_retries,
81+
backoff_factor=backoff_factor,
82+
status_forcelist=status_forcelist or [429, 500, 502, 503, 504],
83+
allowed_methods=["GET", "POST", "PUT", "DELETE"],
84+
)
85+
adapter = HTTPAdapter(max_retries=retry_strategy)
86+
self.session.mount("http://", adapter)
87+
self.session.mount("https://", adapter)
88+
89+
self._setup_auth()
90+
91+
def _setup_auth(self) -> None:
92+
auth = f"{self.key}:{self.secret}"
93+
encoded_auth = base64.b64encode(auth.encode()).decode()
94+
self.session.headers.update(
95+
{
96+
"Authorization": f"Basic {encoded_auth}",
97+
"Content-Type": "application/json",
98+
"Accept": "application/json",
99+
}
100+
)
101+
102+
def _make_request(
103+
self,
104+
method: str,
105+
endpoint: str,
106+
params: Optional[Dict[str, Any]] = None,
107+
data: Optional[Dict[str, Any]] = None,
108+
json_data: Optional[Dict[str, Any]] = None,
109+
headers: Optional[Dict[str, str]] = None,
110+
) -> Dict[str, Any]:
111+
url = urljoin(self.base_url, endpoint)
112+
request_headers = {}
113+
if headers:
114+
request_headers.update(headers)
115+
116+
try:
117+
logger.debug("Making %s request to %s", method, url)
118+
response = self.session.request(
119+
method=method,
120+
url=url,
121+
params=params,
122+
data=data,
123+
json=json_data,
124+
headers=request_headers,
125+
timeout=self.timeout,
126+
)
127+
response.raise_for_status()
128+
129+
if response.content and response.content.strip():
130+
return response.json()
131+
return {}
132+
133+
except HTTPError as e:
134+
status_code = e.response.status_code
135+
error_msg = str(e)
136+
137+
try:
138+
error_data = e.response.json()
139+
if isinstance(error_data, dict):
140+
error_msg = error_data.get("message", str(e))
141+
except (ValueError, KeyError):
142+
pass
143+
144+
if status_code == 401:
145+
raise HighspotAuthenticationError(f"Authentication failed: {error_msg}")
146+
elif status_code == 429:
147+
retry_after = e.response.headers.get("Retry-After")
148+
raise HighspotRateLimitError(
149+
f"Rate limit exceeded: {error_msg}", retry_after=retry_after
150+
)
151+
else:
152+
raise HighspotClientError(
153+
f"API error {status_code}: {error_msg}", status_code=status_code
154+
)
155+
156+
except Timeout:
157+
raise HighspotClientError("Request timed out")
158+
except RequestException as e:
159+
raise HighspotClientError(f"Request failed: {str(e)}")
160+
161+
def get_spots(self) -> List[Dict[str, Any]]:
162+
"""Get all available spots, paginated. Returns flat list."""
163+
all_spots: list[dict[str, Any]] = []
164+
has_more = True
165+
current_offset = 0
166+
167+
while has_more:
168+
params = {"right": "view", "start": current_offset, "limit": PAGE_SIZE}
169+
response = self._make_request("GET", "spots", params=params)
170+
found_spots = response.get("collection", [])
171+
logger.info(
172+
"Received %s spots at offset %s", len(found_spots), current_offset
173+
)
174+
all_spots.extend(found_spots)
175+
if len(found_spots) < PAGE_SIZE:
176+
has_more = False
177+
else:
178+
current_offset += PAGE_SIZE
179+
logger.info("Total spots retrieved: %s", len(all_spots))
180+
return all_spots
181+
182+
def get_spot(self, spot_id: str) -> Dict[str, Any]:
183+
if not spot_id:
184+
raise ValueError("spot_id is required")
185+
return self._make_request("GET", f"spots/{spot_id}")
186+
187+
def get_spot_items(
188+
self, spot_id: str, offset: int = 0, page_size: int = PAGE_SIZE
189+
) -> Dict[str, Any]:
190+
if not spot_id:
191+
raise ValueError("spot_id is required")
192+
params = {"spot": spot_id, "start": offset, "limit": page_size}
193+
return self._make_request("GET", "items", params=params)
194+
195+
def get_item(self, item_id: str) -> Dict[str, Any]:
196+
if not item_id:
197+
raise ValueError("item_id is required")
198+
return self._make_request("GET", f"items/{item_id}")
199+
200+
def get_item_content(self, item_id: str) -> bytes:
201+
"""Fetch raw item content as bytes (for file extraction)."""
202+
if not item_id:
203+
raise ValueError("item_id is required")
204+
205+
url = urljoin(self.base_url, f"items/{item_id}/content")
206+
response = self.session.get(url, timeout=self.timeout)
207+
response.raise_for_status()
208+
return response.content
209+
210+
def health_check(self) -> bool:
211+
"""Validate creds + reachability via a 1-row spots fetch."""
212+
try:
213+
self._make_request("GET", "spots", params={"limit": 1})
214+
return True
215+
except (HighspotClientError, HighspotAuthenticationError):
216+
return False

0 commit comments

Comments
 (0)