diff --git a/python-sdk/folders.mdx b/python-sdk/folders.mdx index c922fc9..c172a07 100644 --- a/python-sdk/folders.mdx +++ b/python-sdk/folders.mdx @@ -96,7 +96,7 @@ All the core document operations available on the main Morphik client are also a - `ingest_file` - Ingest a file into this folder - `ingest_files` - Ingest multiple files into this folder - `ingest_directory` - Ingest all files from a directory into this folder -- `retrieve_chunks` - Retrieve chunks matching a query from this folder +- `retrieve_chunks` - Retrieve chunks matching a query from this folder (supports [reverse image search](/python-sdk/retrieve_chunks#reverse-image-search)) - `retrieve_docs` - Retrieve documents matching a query from this folder - `query` - Generate a completion using context from this folder (supports `llm_config` parameter for custom LLM configuration) - `list_documents` - List all documents in this folder diff --git a/python-sdk/retrieve_chunks.mdx b/python-sdk/retrieve_chunks.mdx index 6db6100..1d82100 100644 --- a/python-sdk/retrieve_chunks.mdx +++ b/python-sdk/retrieve_chunks.mdx @@ -7,7 +7,7 @@ description: "Retrieve relevant chunks from Morphik" ```python def retrieve_chunks( - query: str, + query: Optional[str] = None, filters: Optional[Dict[str, Any]] = None, k: int = 4, min_score: float = 0.0, @@ -15,13 +15,14 @@ description: "Retrieve relevant chunks from Morphik" folder_name: Optional[Union[str, List[str]]] = None, padding: int = 0, output_format: Optional[str] = None, + query_image: Optional[str] = None, ) -> List[FinalChunkResult] ``` ```python async def retrieve_chunks( - query: str, + query: Optional[str] = None, filters: Optional[Dict[str, Any]] = None, k: int = 4, min_score: float = 0.0, @@ -29,6 +30,7 @@ description: "Retrieve relevant chunks from Morphik" folder_name: Optional[Union[str, List[str]]] = None, padding: int = 0, output_format: Optional[str] = None, + query_image: Optional[str] = None, ) -> List[FinalChunkResult] ``` @@ -36,7 +38,7 @@ description: "Retrieve relevant chunks from Morphik" ## Parameters -- `query` (str): Search query text +- `query` (str, optional): Search query text. Mutually exclusive with `query_image`. - `filters` (Dict[str, Any], optional): Optional metadata filters - `k` (int, optional): Number of results. Defaults to 4. - `min_score` (float, optional): Minimum similarity threshold. Defaults to 0.0. @@ -44,6 +46,7 @@ description: "Retrieve relevant chunks from Morphik" - `folder_name` (str | List[str], optional): Optional folder scope. Accepts a single folder name or a list of folder names. - `padding` (int, optional): Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only). Defaults to 0. - `output_format` (str, optional): Controls how image chunks are returned. Set to `"url"` to receive presigned URLs; omit or set to `"base64"` (default) to receive base64 content. +- `query_image` (str, optional): Base64-encoded image for reverse image search. Mutually exclusive with `query`. Requires `use_colpali=True`. ## Metadata Filters @@ -140,3 +143,61 @@ The `FinalChunkResult` objects returned by this method have the following proper - The `download_url` field may be populated for image chunks. When using `output_format="url"`, it will typically match `content` for those chunks. Tip: To download the original raw file for a document, use [`get_document_download_url`](./get_document_download_url). + +## Reverse Image Search + +You can search using an image instead of text by providing `query_image` with a base64-encoded image. This enables finding visually similar content in your documents. + + + + ```python + import base64 + from morphik import Morphik + + db = Morphik() + + # Load and encode your query image + with open("query_image.png", "rb") as f: + image_b64 = base64.b64encode(f.read()).decode("utf-8") + + # Search using the image + chunks = db.retrieve_chunks( + query_image=image_b64, + use_colpali=True, # Required for image queries + k=5, + ) + + for chunk in chunks: + print(f"Score: {chunk.score}") + print(f"Document ID: {chunk.document_id}") + print("---") + ``` + + + ```python + import base64 + from morphik import AsyncMorphik + + async with AsyncMorphik() as db: + # Load and encode your query image + with open("query_image.png", "rb") as f: + image_b64 = base64.b64encode(f.read()).decode("utf-8") + + # Search using the image + chunks = await db.retrieve_chunks( + query_image=image_b64, + use_colpali=True, # Required for image queries + k=5, + ) + + for chunk in chunks: + print(f"Score: {chunk.score}") + print(f"Document ID: {chunk.document_id}") + print("---") + ``` + + + + + Reverse image search requires documents to be ingested with `use_colpali=True`. You must provide either `query` or `query_image`, but not both. + diff --git a/python-sdk/retrieve_chunks_grouped.mdx b/python-sdk/retrieve_chunks_grouped.mdx index deab563..23fd81b 100644 --- a/python-sdk/retrieve_chunks_grouped.mdx +++ b/python-sdk/retrieve_chunks_grouped.mdx @@ -7,7 +7,7 @@ description: "Retrieve relevant chunks with grouping for UI display" ```python def retrieve_chunks_grouped( - query: str, + query: Optional[str] = None, filters: Optional[Dict[str, Any]] = None, k: int = 4, min_score: float = 0.0, @@ -20,13 +20,14 @@ description: "Retrieve relevant chunks with grouping for UI display" graph_name: Optional[str] = None, hop_depth: int = 1, include_paths: bool = False, + query_image: Optional[str] = None, ) -> GroupedChunkResponse ``` ```python async def retrieve_chunks_grouped( - query: str, + query: Optional[str] = None, filters: Optional[Dict[str, Any]] = None, k: int = 4, min_score: float = 0.0, @@ -39,6 +40,7 @@ description: "Retrieve relevant chunks with grouping for UI display" graph_name: Optional[str] = None, hop_depth: int = 1, include_paths: bool = False, + query_image: Optional[str] = None, ) -> GroupedChunkResponse ``` @@ -46,7 +48,7 @@ description: "Retrieve relevant chunks with grouping for UI display" ## Parameters -- `query` (str): Search query text +- `query` (str, optional): Search query text. Mutually exclusive with `query_image`. - `filters` (Dict[str, Any], optional): Optional metadata filters - `k` (int, optional): Number of results. Defaults to 4. - `min_score` (float, optional): Minimum similarity threshold. Defaults to 0.0. @@ -59,6 +61,7 @@ description: "Retrieve relevant chunks with grouping for UI display" - `graph_name` (str, optional): Name of the graph to use for knowledge graph-enhanced retrieval - `hop_depth` (int, optional): Number of relationship hops to traverse in the graph. Defaults to 1. - `include_paths` (bool, optional): Whether to include relationship paths in the response. Defaults to False. +- `query_image` (str, optional): Base64-encoded image for reverse image search. Mutually exclusive with `query`. Requires `use_colpali=True`. ## Returns @@ -182,3 +185,63 @@ Each `ChunkGroup` in `groups` has: - The `groups` list organizes results with their padding context, ideal for building search result UIs. - When `padding` is specified, surrounding chunks are included in `padding_chunks` for each group. - Knowledge graph parameters (`graph_name`, `hop_depth`, `include_paths`) enable graph-enhanced retrieval. + +## Reverse Image Search + +You can search using an image instead of text by providing `query_image` with a base64-encoded image: + + + + ```python + import base64 + from morphik import Morphik + + db = Morphik() + + # Load and encode your query image + with open("query_image.png", "rb") as f: + image_b64 = base64.b64encode(f.read()).decode("utf-8") + + # Search using the image with grouped results + response = db.retrieve_chunks_grouped( + query_image=image_b64, + use_colpali=True, # Required for image queries + k=5, + padding=1, + ) + + for group in response.groups: + print(f"Main chunk score: {group.main_chunk.score}") + print(f"Document: {group.main_chunk.document_id}") + print("---") + ``` + + + ```python + import base64 + from morphik import AsyncMorphik + + async with AsyncMorphik() as db: + # Load and encode your query image + with open("query_image.png", "rb") as f: + image_b64 = base64.b64encode(f.read()).decode("utf-8") + + # Search using the image with grouped results + response = await db.retrieve_chunks_grouped( + query_image=image_b64, + use_colpali=True, # Required for image queries + k=5, + padding=1, + ) + + for group in response.groups: + print(f"Main chunk score: {group.main_chunk.score}") + print(f"Document: {group.main_chunk.document_id}") + print("---") + ``` + + + + + Reverse image search requires documents to be ingested with `use_colpali=True`. You must provide either `query` or `query_image`, but not both. + diff --git a/python-sdk/users.mdx b/python-sdk/users.mdx index 98ba9b6..317d357 100644 --- a/python-sdk/users.mdx +++ b/python-sdk/users.mdx @@ -90,7 +90,7 @@ The UserScope class provides the same document operations as the main Morphik cl - `ingest_file` - Ingest a file for this user - `ingest_files` - Ingest multiple files for this user - `ingest_directory` - Ingest all files from a directory for this user -- `retrieve_chunks` - Retrieve chunks matching a query from this user's documents +- `retrieve_chunks` - Retrieve chunks matching a query from this user's documents (supports [reverse image search](/python-sdk/retrieve_chunks#reverse-image-search)) - `retrieve_docs` - Retrieve documents matching a query from this user's documents - `query` - Generate a completion using context from this user's documents (supports `llm_config` parameter for custom LLM configuration) - `list_documents` - List all documents owned by this user