diff --git a/python-sdk/folders.mdx b/python-sdk/folders.mdx
index c922fc9..c172a07 100644
--- a/python-sdk/folders.mdx
+++ b/python-sdk/folders.mdx
@@ -96,7 +96,7 @@ All the core document operations available on the main Morphik client are also a
- `ingest_file` - Ingest a file into this folder
- `ingest_files` - Ingest multiple files into this folder
- `ingest_directory` - Ingest all files from a directory into this folder
-- `retrieve_chunks` - Retrieve chunks matching a query from this folder
+- `retrieve_chunks` - Retrieve chunks matching a query from this folder (supports [reverse image search](/python-sdk/retrieve_chunks#reverse-image-search))
- `retrieve_docs` - Retrieve documents matching a query from this folder
- `query` - Generate a completion using context from this folder (supports `llm_config` parameter for custom LLM configuration)
- `list_documents` - List all documents in this folder
diff --git a/python-sdk/retrieve_chunks.mdx b/python-sdk/retrieve_chunks.mdx
index 6db6100..1d82100 100644
--- a/python-sdk/retrieve_chunks.mdx
+++ b/python-sdk/retrieve_chunks.mdx
@@ -7,7 +7,7 @@ description: "Retrieve relevant chunks from Morphik"
```python
def retrieve_chunks(
- query: str,
+ query: Optional[str] = None,
filters: Optional[Dict[str, Any]] = None,
k: int = 4,
min_score: float = 0.0,
@@ -15,13 +15,14 @@ description: "Retrieve relevant chunks from Morphik"
folder_name: Optional[Union[str, List[str]]] = None,
padding: int = 0,
output_format: Optional[str] = None,
+ query_image: Optional[str] = None,
) -> List[FinalChunkResult]
```
```python
async def retrieve_chunks(
- query: str,
+ query: Optional[str] = None,
filters: Optional[Dict[str, Any]] = None,
k: int = 4,
min_score: float = 0.0,
@@ -29,6 +30,7 @@ description: "Retrieve relevant chunks from Morphik"
folder_name: Optional[Union[str, List[str]]] = None,
padding: int = 0,
output_format: Optional[str] = None,
+ query_image: Optional[str] = None,
) -> List[FinalChunkResult]
```
@@ -36,7 +38,7 @@ description: "Retrieve relevant chunks from Morphik"
## Parameters
-- `query` (str): Search query text
+- `query` (str, optional): Search query text. Mutually exclusive with `query_image`.
- `filters` (Dict[str, Any], optional): Optional metadata filters
- `k` (int, optional): Number of results. Defaults to 4.
- `min_score` (float, optional): Minimum similarity threshold. Defaults to 0.0.
@@ -44,6 +46,7 @@ description: "Retrieve relevant chunks from Morphik"
- `folder_name` (str | List[str], optional): Optional folder scope. Accepts a single folder name or a list of folder names.
- `padding` (int, optional): Number of additional chunks/pages to retrieve before and after matched chunks (ColPali only). Defaults to 0.
- `output_format` (str, optional): Controls how image chunks are returned. Set to `"url"` to receive presigned URLs; omit or set to `"base64"` (default) to receive base64 content.
+- `query_image` (str, optional): Base64-encoded image for reverse image search. Mutually exclusive with `query`. Requires `use_colpali=True`.
## Metadata Filters
@@ -140,3 +143,61 @@ The `FinalChunkResult` objects returned by this method have the following proper
- The `download_url` field may be populated for image chunks. When using `output_format="url"`, it will typically match `content` for those chunks.
Tip: To download the original raw file for a document, use [`get_document_download_url`](./get_document_download_url).
+
+## Reverse Image Search
+
+You can search using an image instead of text by providing `query_image` with a base64-encoded image. This enables finding visually similar content in your documents.
+
+
+
+ ```python
+ import base64
+ from morphik import Morphik
+
+ db = Morphik()
+
+ # Load and encode your query image
+ with open("query_image.png", "rb") as f:
+ image_b64 = base64.b64encode(f.read()).decode("utf-8")
+
+ # Search using the image
+ chunks = db.retrieve_chunks(
+ query_image=image_b64,
+ use_colpali=True, # Required for image queries
+ k=5,
+ )
+
+ for chunk in chunks:
+ print(f"Score: {chunk.score}")
+ print(f"Document ID: {chunk.document_id}")
+ print("---")
+ ```
+
+
+ ```python
+ import base64
+ from morphik import AsyncMorphik
+
+ async with AsyncMorphik() as db:
+ # Load and encode your query image
+ with open("query_image.png", "rb") as f:
+ image_b64 = base64.b64encode(f.read()).decode("utf-8")
+
+ # Search using the image
+ chunks = await db.retrieve_chunks(
+ query_image=image_b64,
+ use_colpali=True, # Required for image queries
+ k=5,
+ )
+
+ for chunk in chunks:
+ print(f"Score: {chunk.score}")
+ print(f"Document ID: {chunk.document_id}")
+ print("---")
+ ```
+
+
+
+
+ Reverse image search requires documents to be ingested with `use_colpali=True`. You must provide either `query` or `query_image`, but not both.
+
diff --git a/python-sdk/retrieve_chunks_grouped.mdx b/python-sdk/retrieve_chunks_grouped.mdx
index deab563..23fd81b 100644
--- a/python-sdk/retrieve_chunks_grouped.mdx
+++ b/python-sdk/retrieve_chunks_grouped.mdx
@@ -7,7 +7,7 @@ description: "Retrieve relevant chunks with grouping for UI display"
```python
def retrieve_chunks_grouped(
- query: str,
+ query: Optional[str] = None,
filters: Optional[Dict[str, Any]] = None,
k: int = 4,
min_score: float = 0.0,
@@ -20,13 +20,14 @@ description: "Retrieve relevant chunks with grouping for UI display"
graph_name: Optional[str] = None,
hop_depth: int = 1,
include_paths: bool = False,
+ query_image: Optional[str] = None,
) -> GroupedChunkResponse
```
```python
async def retrieve_chunks_grouped(
- query: str,
+ query: Optional[str] = None,
filters: Optional[Dict[str, Any]] = None,
k: int = 4,
min_score: float = 0.0,
@@ -39,6 +40,7 @@ description: "Retrieve relevant chunks with grouping for UI display"
graph_name: Optional[str] = None,
hop_depth: int = 1,
include_paths: bool = False,
+ query_image: Optional[str] = None,
) -> GroupedChunkResponse
```
@@ -46,7 +48,7 @@ description: "Retrieve relevant chunks with grouping for UI display"
## Parameters
-- `query` (str): Search query text
+- `query` (str, optional): Search query text. Mutually exclusive with `query_image`.
- `filters` (Dict[str, Any], optional): Optional metadata filters
- `k` (int, optional): Number of results. Defaults to 4.
- `min_score` (float, optional): Minimum similarity threshold. Defaults to 0.0.
@@ -59,6 +61,7 @@ description: "Retrieve relevant chunks with grouping for UI display"
- `graph_name` (str, optional): Name of the graph to use for knowledge graph-enhanced retrieval
- `hop_depth` (int, optional): Number of relationship hops to traverse in the graph. Defaults to 1.
- `include_paths` (bool, optional): Whether to include relationship paths in the response. Defaults to False.
+- `query_image` (str, optional): Base64-encoded image for reverse image search. Mutually exclusive with `query`. Requires `use_colpali=True`.
## Returns
@@ -182,3 +185,63 @@ Each `ChunkGroup` in `groups` has:
- The `groups` list organizes results with their padding context, ideal for building search result UIs.
- When `padding` is specified, surrounding chunks are included in `padding_chunks` for each group.
- Knowledge graph parameters (`graph_name`, `hop_depth`, `include_paths`) enable graph-enhanced retrieval.
+
+## Reverse Image Search
+
+You can search using an image instead of text by providing `query_image` with a base64-encoded image:
+
+
+
+ ```python
+ import base64
+ from morphik import Morphik
+
+ db = Morphik()
+
+ # Load and encode your query image
+ with open("query_image.png", "rb") as f:
+ image_b64 = base64.b64encode(f.read()).decode("utf-8")
+
+ # Search using the image with grouped results
+ response = db.retrieve_chunks_grouped(
+ query_image=image_b64,
+ use_colpali=True, # Required for image queries
+ k=5,
+ padding=1,
+ )
+
+ for group in response.groups:
+ print(f"Main chunk score: {group.main_chunk.score}")
+ print(f"Document: {group.main_chunk.document_id}")
+ print("---")
+ ```
+
+
+ ```python
+ import base64
+ from morphik import AsyncMorphik
+
+ async with AsyncMorphik() as db:
+ # Load and encode your query image
+ with open("query_image.png", "rb") as f:
+ image_b64 = base64.b64encode(f.read()).decode("utf-8")
+
+ # Search using the image with grouped results
+ response = await db.retrieve_chunks_grouped(
+ query_image=image_b64,
+ use_colpali=True, # Required for image queries
+ k=5,
+ padding=1,
+ )
+
+ for group in response.groups:
+ print(f"Main chunk score: {group.main_chunk.score}")
+ print(f"Document: {group.main_chunk.document_id}")
+ print("---")
+ ```
+
+
+
+
+ Reverse image search requires documents to be ingested with `use_colpali=True`. You must provide either `query` or `query_image`, but not both.
+
diff --git a/python-sdk/users.mdx b/python-sdk/users.mdx
index 98ba9b6..317d357 100644
--- a/python-sdk/users.mdx
+++ b/python-sdk/users.mdx
@@ -90,7 +90,7 @@ The UserScope class provides the same document operations as the main Morphik cl
- `ingest_file` - Ingest a file for this user
- `ingest_files` - Ingest multiple files for this user
- `ingest_directory` - Ingest all files from a directory for this user
-- `retrieve_chunks` - Retrieve chunks matching a query from this user's documents
+- `retrieve_chunks` - Retrieve chunks matching a query from this user's documents (supports [reverse image search](/python-sdk/retrieve_chunks#reverse-image-search))
- `retrieve_docs` - Retrieve documents matching a query from this user's documents
- `query` - Generate a completion using context from this user's documents (supports `llm_config` parameter for custom LLM configuration)
- `list_documents` - List all documents owned by this user