Skip to content

Commit f42e662

Browse files
authored
Merge pull request #32 from Zipstack/feat/add-whisper-detail-method
feat: add whisper_detail method to LLMWhispererClientV2
2 parents f0a5b15 + 6ccfd6d commit f42e662

File tree

5 files changed

+149
-6
lines changed

5 files changed

+149
-6
lines changed

README.md

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,33 @@ LLMs are powerful, but their output is as good as the input you provide. LLMWhis
99

1010
Refer to the client documentation for more information: [LLMWhisperer Client Documentation](https://docs.unstract.com/llmwhisperer/llm_whisperer/python_client/llm_whisperer_python_client_intro/)
1111

12-
## A note on versions
12+
## Client
1313

14-
There are two versions of the client library available in this package:
14+
This package provides **LLMWhispererClientV2**, the client for LLMWhisperer API v2. It is required for all users on API version 2.0.0 and above.
1515

16-
**LLMWhispererClient**: This is the legacy version of the client library and is recommended for supporting older apps only. This version will be deprecated in the future.
16+
Documentation is available [here](https://docs.unstract.com/llmwhisperer/).
1717

18-
**LLMWhispererClientV2**: This is the latest version of the client library and is recommended for all new users. It is mandatory for all users who are using LLMWhisperer API version 2.0.0 and above (All customers who have signed up after 5th November 2024).
18+
## Running Tests
1919

20-
Documentation for both versions are available [here](https://docs.unstract.com/llmwhisperer/)
20+
Install test dependencies and run all tests:
2121

22+
```bash
23+
uv run --group test pytest
24+
```
25+
26+
To run only unit tests (skipping integration tests):
27+
28+
```bash
29+
uv run --group test pytest tests/unit tests/utils_test.py
30+
```
31+
32+
To run only integration tests:
33+
34+
```bash
35+
uv run --group test pytest tests/integration
36+
```
37+
38+
Integration tests require a valid API key. Copy `sample.env` to `.env` and fill in your credentials before running them.
2239

2340
## Questions and Feedback
2441

sample.env

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
LLMWHISPERER_BASE_URL=https://llmwhisperer-api.unstract.com/v1
21
LLMWHISPERER_BASE_URL_V2=https://llmwhisperer-api.us-central.unstract.com/api/v2
32
LLMWHISPERER_LOG_LEVEL=DEBUG
43
LLMWHISPERER_API_KEY=

src/unstract/llmwhisperer/client_v2.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,48 @@ def get_highlight_data(self, whisper_hash: str, lines: str, extract_all_lines: b
325325
raise LLMWhispererClientException(err)
326326
return json.loads(response.text)
327327

328+
def whisper_detail(self, whisper_hash: str) -> Any:
329+
"""Retrieves the details of a text extraction process.
330+
331+
This method sends a GET request to the '/whisper-detail' endpoint of the LLMWhisperer API.
332+
The response is a JSON object containing metadata about the extraction job.
333+
Refer to https://docs.unstract.com/llmwhisperer/llm_whisperer/apis/llm_whisperer_text_extraction_detail_api
334+
335+
Args:
336+
whisper_hash (str): The identifier returned when starting the extraction process.
337+
338+
Returns:
339+
Dict[Any, Any]: A dictionary containing the extraction details including
340+
completed_at, mode, processed_pages, processing_started_at,
341+
processing_time_in_seconds, requested_pages, tag, total_pages,
342+
upload_file_size_in_kb, and whisper_hash.
343+
344+
Raises:
345+
LLMWhispererClientException: If the API request fails, it raises an exception with
346+
the error message and status code returned by the API.
347+
"""
348+
self.logger.debug("whisper_detail called")
349+
url = f"{self.base_url}/whisper-detail"
350+
params = {"whisper_hash": whisper_hash}
351+
self.logger.debug("url: %s", url)
352+
self.logger.debug("whisper_hash: %s", whisper_hash)
353+
354+
req = requests.Request("GET", url, headers=self.headers, params=params)
355+
prepared = req.prepare()
356+
response = self._send_request(prepared)
357+
if response.status_code != 200:
358+
if not (response.text or "").strip():
359+
raise LLMWhispererClientException("API error: empty response body", response.status_code)
360+
try:
361+
err = json.loads(response.text)
362+
except json.JSONDecodeError as e:
363+
response_preview = response.text[:500] + "..." if len(response.text) > 500 else response.text
364+
raise LLMWhispererClientException(
365+
f"API error: non-JSON response - {response_preview}", response.status_code
366+
) from e
367+
raise LLMWhispererClientException(err, response.status_code)
368+
return json.loads(response.text)
369+
328370
def whisper(
329371
self,
330372
file_path: str = "",

tests/integration/client_v2_test.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,51 @@ def test_webhook(client_v2: LLMWhispererClientV2, url: str, token: str, webhook_
235235
assert e.error_message()["status_code"] == 404
236236

237237

238+
def test_whisper_detail(client_v2: LLMWhispererClientV2, data_dir: str) -> None:
239+
"""Test whisper_detail returns extraction metadata after a whisper operation."""
240+
file_path = os.path.join(data_dir, "credit_card.pdf")
241+
whisper_result = client_v2.whisper(
242+
mode="native_text",
243+
output_mode="text",
244+
file_path=file_path,
245+
wait_for_completion=True,
246+
)
247+
whisper_hash = whisper_result["whisper_hash"]
248+
249+
detail = client_v2.whisper_detail(whisper_hash)
250+
251+
assert isinstance(detail, dict)
252+
assert detail["whisper_hash"] == whisper_hash
253+
expected_keys = [
254+
"completed_at",
255+
"mode",
256+
"processed_pages",
257+
"processing_started_at",
258+
"processing_time_in_seconds",
259+
"requested_pages",
260+
"tag",
261+
"total_pages",
262+
"upload_file_size_in_kb",
263+
"whisper_hash",
264+
]
265+
assert set(expected_keys).issubset(
266+
detail.keys()
267+
), f"whisper_detail is missing expected keys: {set(expected_keys) - set(detail.keys())}"
268+
assert detail["mode"] == "native_text"
269+
assert detail["processed_pages"] > 0
270+
assert detail["total_pages"] > 0
271+
272+
273+
def test_whisper_detail_not_found(client_v2: LLMWhispererClientV2) -> None:
274+
"""Test whisper_detail raises exception for a nonexistent whisper_hash."""
275+
with pytest.raises(LLMWhispererClientException) as exc_info:
276+
client_v2.whisper_detail("nonexistent_hash_12345")
277+
278+
error = exc_info.value.error_message()
279+
assert exc_info.value.status_code == 400
280+
assert "message" in error
281+
282+
238283
def assert_error_message(whisper_result: dict) -> None:
239284
assert isinstance(whisper_result, dict)
240285
assert whisper_result["status"] == "error"

tests/unit/client_v2_test.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,46 @@ def test_get_webhook_details(mocker: MockerFixture, client_v2: LLMWhispererClien
3939
assert response["webhook_details"]["url"] == WEBHOOK_URL
4040

4141

42+
def test_whisper_detail_success(mocker: MockerFixture, client_v2: LLMWhispererClientV2) -> None:
43+
"""Test whisper_detail returns extraction details on success."""
44+
mock_send = mocker.patch("requests.Session.send")
45+
mock_response = MagicMock()
46+
mock_response.status_code = 200
47+
mock_response.text = (
48+
'{"whisper_hash": "abc123", "mode": "high_quality", "processed_pages": 3,'
49+
' "requested_pages": 3, "total_pages": 5, "upload_file_size_in_kb": 120.5,'
50+
' "processing_time_in_seconds": 4.2, "completed_at": "2025-01-01T00:00:00Z",'
51+
' "processing_started_at": "2025-01-01T00:00:00Z", "tag": "default"}'
52+
)
53+
mock_send.return_value = mock_response
54+
55+
response = client_v2.whisper_detail("abc123")
56+
57+
assert response["whisper_hash"] == "abc123"
58+
assert response["mode"] == "high_quality"
59+
assert response["processed_pages"] == 3
60+
assert response["total_pages"] == 5
61+
assert response["upload_file_size_in_kb"] == 120.5
62+
mock_send.assert_called_once()
63+
64+
65+
def test_whisper_detail_not_found(mocker: MockerFixture, client_v2: LLMWhispererClientV2) -> None:
66+
"""Test whisper_detail raises exception when record is not found."""
67+
mock_send = mocker.patch("requests.Session.send")
68+
mock_response = MagicMock()
69+
mock_response.status_code = 400
70+
mock_response.text = '{"message": "Record not found"}'
71+
mock_send.return_value = mock_response
72+
73+
with pytest.raises(LLMWhispererClientException) as exc_info:
74+
client_v2.whisper_detail("nonexistent_hash")
75+
76+
error = exc_info.value.error_message()
77+
assert error["message"] == "Record not found"
78+
assert exc_info.value.status_code == 400
79+
mock_send.assert_called_once()
80+
81+
4282
def test_whisper_json_string_response_error(mocker: MockerFixture, client_v2: LLMWhispererClientV2) -> None:
4383
"""Test whisper method handles JSON string responses correctly for error
4484
cases."""

0 commit comments

Comments
 (0)