1212import logging
1313import time
1414from pathlib import Path
15+ from typing import Optional
1516
1617import requests
1718from requests .models import Response
@@ -294,20 +295,32 @@ def begin_analyze(self, analyzer_id: str, file_location: str):
294295
295296 def get_image_from_analyze_operation (
296297 self , analyze_response : Response , image_id : str
297- ):
298- """Retrieves a generated file (e.g., a rendered page image) from a
299- completed analyze operation by its file id / path.
300-
301- In Content Understanding GA the file-retrieval URL changed from
302- ``{operationLocation}/images/{imageId}`` to
303- ``{operationLocation}/files/{fileId}`` (where ``operationLocation`` now
304- ends in ``/analyzerResults/{operationId}``).
298+ ) -> Optional [bytes ]:
299+ """Retrieve a rendered page image (JPEG) generated by a completed
300+ analyze operation, by its file id / path.
301+
302+ Although the GA file-retrieval endpoint is generic
303+ (``{operationLocation}/files/{fileId}``, replacing the legacy
304+ ``{operationLocation}/images/{imageId}``), this helper is intentionally
305+ image-specific: it asserts that the returned ``Content-Type`` is
306+ ``image/jpeg`` and is only intended for use with JPEG page images
307+ produced by the analyzer. Use a different helper if you need to fetch
308+ non-image generated files.
305309
306310 Args:
307- analyze_response (Response): The response object from the analyze operation.
308- image_id (str): The id (or path) of the file to retrieve.
311+ analyze_response (Response): The response object from the analyze
312+ operation (used only to read its ``operation-location`` header).
313+ image_id (str): The id (or path) of the image file to retrieve.
314+
309315 Returns:
310- bytes: The file content as a byte string.
316+ Optional[bytes]: The JPEG image bytes on success, or ``None`` if
317+ the HTTP request fails (the underlying :class:`RequestException`
318+ is logged but not re-raised).
319+
320+ Raises:
321+ ValueError: If the analyze response does not contain an
322+ ``operation-location`` header.
323+ AssertionError: If the retrieved file is not ``image/jpeg``.
311324 """
312325 operation_location = analyze_response .headers .get ("operation-location" , "" )
313326 if not operation_location :
@@ -326,7 +339,7 @@ def get_image_from_analyze_operation(
326339
327340 return response .content
328341 except requests .exceptions .RequestException as e :
329- print ( f "HTTP request failed: { e } " )
342+ self . _logger . error ( "HTTP request failed while retrieving image: %s" , e )
330343 return None
331344
332345 def poll_result (
0 commit comments