morphik-org · Adityav369 · Nov 27, 2025 · Nov 27, 2025
diff --git a/docs.json b/docs.json
@@ -162,9 +162,11 @@
             "group": "Document Retrieval",
             "pages": [
               "python-sdk/retrieve_chunks",
+              "python-sdk/retrieve_chunks_grouped",
               "python-sdk/retrieve_docs",
               "python-sdk/query",
               "python-sdk/list_documents",
+              "python-sdk/search_documents",
               "python-sdk/get_document",
               "python-sdk/get_document_by_filename"
             ]
@@ -176,6 +178,8 @@
             "python-sdk/create_folder",
             "python-sdk/list_folders",
             "python-sdk/get_folder",
+            "python-sdk/get_folders_summary",
+            "python-sdk/get_folders_details",
             "python-sdk/add_document_to_folder",
             "python-sdk/remove_document_from_folder",
             "python-sdk/delete_folder",
@@ -204,9 +208,12 @@
             "group": "Knowledge Graph Operations",
             "pages": [
               "python-sdk/create_graph",
+              "python-sdk/update_graph",
               "python-sdk/get_graph",
               "python-sdk/list_graphs",
               "python-sdk/get_graph_visualization",
+              "python-sdk/get_graph_status",
+              "python-sdk/wait_for_graph_completion",
               "python-sdk/check_workflow_status"
             ]
           },
@@ -220,7 +227,12 @@
           {
             "group": "Document Management",
             "pages": [
-              "python-sdk/get_document_download_url"
+              "python-sdk/get_document_file",
+              "python-sdk/extract_document_pages",
+              "python-sdk/get_document_download_url",
+              "python-sdk/get_document_status",
+              "python-sdk/delete_document",
+              "python-sdk/delete_document_by_filename"
             ]
           },
           {

diff --git a/python-sdk/batch_get_chunks.mdx b/python-sdk/batch_get_chunks.mdx
@@ -1,24 +1,37 @@
 ---
 title: "batch_get_chunks"
 description: "Retrieve specific chunks by their document ID and chunk number"
 ---

 <Tabs>
   <Tab title="Sync">
     ```python
-    def batch_get_chunks(sources: List[Union[ChunkSource, Dict[str, Any]]]) -> List[FinalChunkResult]
+    def batch_get_chunks(
+        sources: List[Union[ChunkSource, Dict[str, Any]]],
+        folder_name: Optional[Union[str, List[str]]] = None,
+        use_colpali: bool = True,
+        output_format: Optional[str] = None,
+    ) -> List[FinalChunkResult]
     ```
   </Tab>
   <Tab title="Async">
     ```python
-    async def batch_get_chunks(sources: List[Union[ChunkSource, Dict[str, Any]]]) -> List[FinalChunkResult]
+    async def batch_get_chunks(
+        sources: List[Union[ChunkSource, Dict[str, Any]]],
+        folder_name: Optional[Union[str, List[str]]] = None,
+        use_colpali: bool = True,
+        output_format: Optional[str] = None,
+    ) -> List[FinalChunkResult]
     ```
   </Tab>
 </Tabs>
 
 ## Parameters
 
 - `sources` (List[Union[ChunkSource, Dict[str, Any]]]): List of ChunkSource objects or dictionaries with document_id and chunk_number
+- `folder_name` (str | List[str], optional): Optional folder scope. Accepts a single folder name or a list of folder names.
+- `use_colpali` (bool, optional): Whether to request multimodal chunks when available. Defaults to True.
+- `output_format` (str, optional): Controls how image chunks are returned. Set to `"url"` to receive presigned URLs; omit or set to `"base64"` (default) to receive base64 content.
 
 ## Returns
 
@@ -80,7 +93,7 @@

 Each `FinalChunkResult` object in the returned list has the following properties:

 - `content` (str | PILImage): Chunk content (text or image)
 - `score` (float): Relevance score
 - `document_id` (str): Parent document ID
 - `chunk_number` (int): Chunk sequence number

diff --git a/python-sdk/batch_get_documents.mdx b/python-sdk/batch_get_documents.mdx
@@ -1,24 +1,31 @@
 ---
 title: "batch_get_documents"
 description: "Retrieve multiple documents by their IDs in a single batch operation"
 ---

 <Tabs>
   <Tab title="Sync">
     ```python
-    def batch_get_documents(document_ids: List[str]) -> List[Document]
+    def batch_get_documents(
+        document_ids: List[str],
+        folder_name: Optional[Union[str, List[str]]] = None,
+    ) -> List[Document]
     ```
   </Tab>
   <Tab title="Async">
     ```python
-    async def batch_get_documents(document_ids: List[str]) -> List[Document]
+    async def batch_get_documents(
+        document_ids: List[str],
+        folder_name: Optional[Union[str, List[str]]] = None,
+    ) -> List[Document]
     ```
   </Tab>
 </Tabs>
 
 ## Parameters
 
 - `document_ids` (List[str]): List of document IDs to retrieve
+- `folder_name` (str | List[str], optional): Optional folder scope. Accepts a single folder name or a list of folder names.
 
 ## Returns
 

diff --git a/python-sdk/extract_document_pages.mdx b/python-sdk/extract_document_pages.mdx
@@ -0,0 +1,101 @@
+---
+title: "extract_document_pages"
+description: "Extract specific pages from a document"
+---
+
+<Tabs>
+  <Tab title="Sync">
+    ```python
+    def extract_document_pages(
+        document_id: str,
+        start_page: int,
+        end_page: int,
+    ) -> DocumentPagesResponse
+    ```
+  </Tab>
+  <Tab title="Async">
+    ```python
+    async def extract_document_pages(
+        document_id: str,
+        start_page: int,
+        end_page: int,
+    ) -> DocumentPagesResponse
+    ```
+  </Tab>
+</Tabs>
+
+## Parameters
+
+- `document_id` (str): ID of the document to extract pages from
+- `start_page` (int): Starting page number (1-indexed)
+- `end_page` (int): Ending page number (1-indexed)
+
+## Returns
+
+- `DocumentPagesResponse`: Object containing extracted pages with metadata
+
+## Examples
+
+<Tabs>
+  <Tab title="Sync">
+    ```python
+    from morphik import Morphik
+
+    db = Morphik()
+
+    # Extract pages 1-3 from a document
+    response = db.extract_document_pages(
+        document_id="doc_123abc",
+        start_page=1,
+        end_page=3,
+    )
+
+    print(f"Document ID: {response.document_id}")
+    print(f"Extracted pages {response.start_page}-{response.end_page}")
+    print(f"Total pages in document: {response.total_pages}")
+    print(f"Number of pages extracted: {len(response.pages)}")
+
+    # Pages are base64 encoded
+    for i, page_content in enumerate(response.pages):
+        print(f"Page {response.start_page + i}: {len(page_content)} chars")
+    ```
+  </Tab>
+  <Tab title="Async">
+    ```python
+    from morphik import AsyncMorphik
+
+    async with AsyncMorphik() as db:
+        # Extract pages 1-3 from a document
+        response = await db.extract_document_pages(
+            document_id="doc_123abc",
+            start_page=1,
+            end_page=3,
+        )
+
+        print(f"Document ID: {response.document_id}")
+        print(f"Extracted pages {response.start_page}-{response.end_page}")
+        print(f"Total pages in document: {response.total_pages}")
+        print(f"Number of pages extracted: {len(response.pages)}")
+
+        # Pages are base64 encoded
+        for i, page_content in enumerate(response.pages):
+            print(f"Page {response.start_page + i}: {len(page_content)} chars")
+    ```
+  </Tab>
+</Tabs>
+
+## DocumentPagesResponse Properties
+
+The `DocumentPagesResponse` object has the following properties:
+
+- `document_id` (str): ID of the document
+- `pages` (List[str]): List of page contents as base64 encoded strings
+- `start_page` (int): Start page number (1-indexed)
+- `end_page` (int): End page number (1-indexed)
+- `total_pages` (int): Total number of pages in the document
+
+## Notes
+
+- Page numbers are 1-indexed (first page is 1, not 0).
+- The `pages` list contains base64 encoded representations of each page.
+- Useful for extracting specific sections of large documents.
diff --git a/python-sdk/get_document_file.mdx b/python-sdk/get_document_file.mdx
@@ -0,0 +1,74 @@
+---
+title: "get_document_file"
+description: "Download the raw file content of a document"
+---
+
+<Tabs>
+  <Tab title="Sync">
+    ```python
+    def get_document_file(
+        document_id: str,
+    ) -> bytes
+    ```
+  </Tab>
+  <Tab title="Async">
+    ```python
+    async def get_document_file(
+        document_id: str,
+    ) -> bytes
+    ```
+  </Tab>
+</Tabs>
+
+## Parameters
+
+- `document_id` (str): ID of the document to download
+
+## Returns
+
+- `bytes`: Raw file content as bytes
+
+## Examples
+
+<Tabs>
+  <Tab title="Sync">
+    ```python
+    from morphik import Morphik
+
+    db = Morphik()
+
+    # Download a document's raw file
+    doc_id = "doc_123abc"
+    file_content = db.get_document_file(doc_id)
+
+    # Save to local file
+    with open("downloaded_file.pdf", "wb") as f:
+        f.write(file_content)
+
+    print(f"Downloaded {len(file_content)} bytes")
+    ```
+  </Tab>
+  <Tab title="Async">
+    ```python
+    from morphik import AsyncMorphik
+    import aiofiles
+
+    async with AsyncMorphik() as db:
+        # Download a document's raw file
+        doc_id = "doc_123abc"
+        file_content = await db.get_document_file(doc_id)
+
+        # Save to local file
+        async with aiofiles.open("downloaded_file.pdf", "wb") as f:
+            await f.write(file_content)
+
+        print(f"Downloaded {len(file_content)} bytes")
+    ```
+  </Tab>
+</Tabs>
+
+## Notes
+
+- This method returns the raw file bytes, which you can save to disk or process in memory.
+- For getting a downloadable URL instead of raw bytes, use [`get_document_download_url`](./get_document_download_url).
+- The returned bytes match the original file that was uploaded/ingested.
diff --git a/python-sdk/get_document_status.mdx b/python-sdk/get_document_status.mdx
@@ -0,0 +1,92 @@
+---
+title: "get_document_status"
+description: "Get the current processing status of a document"
+---
+
+<Tabs>
+  <Tab title="Sync">
+    ```python
+    def get_document_status(
+        document_id: str,
+    ) -> Dict[str, Any]
+    ```
+  </Tab>
+  <Tab title="Async">
+    ```python
+    async def get_document_status(
+        document_id: str,
+    ) -> Dict[str, Any]
+    ```
+  </Tab>
+</Tabs>
+
+## Parameters
+
+- `document_id` (str): ID of the document to check
+
+## Returns
+
+- `Dict[str, Any]`: Status information including current status, potential errors, and other metadata
+
+## Examples
+
+<Tabs>
+  <Tab title="Sync">
+    ```python
+    from morphik import Morphik
+
+    db = Morphik()
+
+    # Check document processing status
+    status = db.get_document_status("doc_123abc")
+
+    print(f"Status: {status.get('status')}")
+    if status.get('error'):
+        print(f"Error: {status.get('error')}")
+
+    # Use in a polling loop
+    import time
+
+    while True:
+        status = db.get_document_status("doc_123abc")
+        if status.get('status') == 'completed':
+            print("Document processing complete!")
+            break
+        elif status.get('status') == 'failed':
+            print(f"Document processing failed: {status.get('error')}")
+            break
+        time.sleep(2)
+    ```
+  </Tab>
+  <Tab title="Async">
+    ```python
+    from morphik import AsyncMorphik
+    import asyncio
+
+    async with AsyncMorphik() as db:
+        # Check document processing status
+        status = await db.get_document_status("doc_123abc")
+
+        print(f"Status: {status.get('status')}")
+        if status.get('error'):
+            print(f"Error: {status.get('error')}")
+
+        # Use in a polling loop
+        while True:
+            status = await db.get_document_status("doc_123abc")
+            if status.get('status') == 'completed':
+                print("Document processing complete!")
+                break
+            elif status.get('status') == 'failed':
+                print(f"Document processing failed: {status.get('error')}")
+                break
+            await asyncio.sleep(2)
+    ```
+  </Tab>
+</Tabs>
+
+## Notes
+
+- Common status values include: `"processing"`, `"completed"`, `"failed"`
+- This is a lightweight endpoint useful for checking progress without fetching the full document.
+- The SDK also provides a helper method for polling: see the document ingestion methods which can wait for completion automatically.