Skip to content

Commit 2ee0abf

Browse files
committed
pending clean_serer_url change
1 parent 5c0c940 commit 2ee0abf

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

src/unstructured_client/_hooks/custom/clean_server_url_hook.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
def clean_server_url(base_url: str) -> str:
11-
"""Fix url scheme and remove the '/general/v0/general' path."""
11+
"""Fix url scheme and remove subpath for URLs under Unstructured domains."""
1212

1313
if not base_url:
1414
return ""
@@ -19,12 +19,18 @@ def clean_server_url(base_url: str) -> str:
1919

2020
parsed_url: ParseResult = urlparse(base_url)
2121

22-
if "api.unstructuredapp.io" in parsed_url.netloc:
22+
if "unstructuredapp.io" in parsed_url.netloc:
2323
if parsed_url.scheme != "https":
2424
parsed_url = parsed_url._replace(scheme="https")
25+
# We only want the base url for Unstructured domains
26+
clean_url = urlunparse(parsed_url._replace(path="", params="", query="", fragment=""))
27+
28+
else:
29+
# For other domains, we want to keep the path
30+
clean_url = urlunparse(parsed_url._replace(params="", query="", fragment=""))
2531

26-
# We only want the base url
27-
return urlunparse(parsed_url._replace(path="", params="", query="", fragment=""))
32+
return clean_url
33+
2834

2935

3036
def choose_server_url(endpoint_url: str | None, client_url: str, default_endpoint_url: str) -> str:

0 commit comments

Comments
 (0)