|
| 1 | +--- |
| 2 | +title: "Batch Get Chunks" |
| 3 | +description: "Retrieve specific chunks by document ID and chunk number" |
| 4 | +--- |
| 5 | + |
| 6 | +Retrieve specific chunks by their document ID and chunk number in a single batch operation. Useful for fetching exact chunks after retrieval or for building custom pipelines. |
| 7 | + |
| 8 | +<Tabs> |
| 9 | + <Tab title="Python"> |
| 10 | + ```python |
| 11 | + from morphik import Morphik |
| 12 | + |
| 13 | + db = Morphik("your-uri") |
| 14 | + |
| 15 | + chunks = db.batch_get_chunks( |
| 16 | + sources=[ |
| 17 | + {"document_id": "doc_abc123", "chunk_number": 0}, |
| 18 | + {"document_id": "doc_abc123", "chunk_number": 1}, |
| 19 | + {"document_id": "doc_xyz789", "chunk_number": 5} |
| 20 | + ], |
| 21 | + folder_name="/reports", |
| 22 | + use_colpali=True, |
| 23 | + output_format="url" |
| 24 | + ) |
| 25 | + |
| 26 | + for chunk in chunks: |
| 27 | + print(f"Doc {chunk.document_id}, Chunk {chunk.chunk_number}") |
| 28 | + print(f"Content: {chunk.content[:200]}...") |
| 29 | + ``` |
| 30 | + </Tab> |
| 31 | + <Tab title="TypeScript"> |
| 32 | + ```typescript |
| 33 | + import Morphik from 'morphik'; |
| 34 | + |
| 35 | + // For Teams/Enterprise, use your dedicated host: https://companyname-api.morphik.ai |
| 36 | + const client = new Morphik({ |
| 37 | + apiKey: process.env.MORPHIK_API_KEY, |
| 38 | + baseURL: 'https://api.morphik.ai' |
| 39 | + }); |
| 40 | + |
| 41 | + const chunks = await client.batch.retrieveChunks({ |
| 42 | + sources: [ |
| 43 | + { document_id: 'doc_abc123', chunk_number: 0 }, |
| 44 | + { document_id: 'doc_abc123', chunk_number: 1 }, |
| 45 | + { document_id: 'doc_xyz789', chunk_number: 5 } |
| 46 | + ], |
| 47 | + folder_name: '/reports', |
| 48 | + use_colpali: true, |
| 49 | + output_format: 'url' |
| 50 | + }); |
| 51 | + |
| 52 | + chunks.forEach(chunk => { |
| 53 | + console.log(`Doc ${chunk.document_id}, Chunk ${chunk.chunk_number}`); |
| 54 | + console.log(`Content: ${chunk.content.slice(0, 200)}...`); |
| 55 | + }); |
| 56 | + ``` |
| 57 | + </Tab> |
| 58 | + <Tab title="cURL"> |
| 59 | + ```bash |
| 60 | + curl -X POST "https://api.morphik.ai/batch/chunks" \ |
| 61 | + -H "Authorization: Bearer $MORPHIK_API_KEY" \ |
| 62 | + -H "Content-Type: application/json" \ |
| 63 | + -d '{ |
| 64 | + "sources": [ |
| 65 | + {"document_id": "doc_abc123", "chunk_number": 0}, |
| 66 | + {"document_id": "doc_abc123", "chunk_number": 1}, |
| 67 | + {"document_id": "doc_xyz789", "chunk_number": 5} |
| 68 | + ], |
| 69 | + "folder_name": "/reports", |
| 70 | + "use_colpali": true, |
| 71 | + "output_format": "url" |
| 72 | + }' |
| 73 | + ``` |
| 74 | + </Tab> |
| 75 | +</Tabs> |
| 76 | + |
| 77 | +## Parameters |
| 78 | + |
| 79 | +| Parameter | Type | Default | Description | |
| 80 | +|-----------|------|---------|-------------| |
| 81 | +| `sources` | array | required | List of `{document_id, chunk_number}` objects | |
| 82 | +| `use_colpali` | boolean | `true` | Use Morphik multimodal embeddings when available | |
| 83 | +| `output_format` | string | `"base64"` | Image format: `base64`, `url`, or `text` | |
| 84 | +| `folder_name` | string | `null` | Optional folder scope | |
| 85 | + |
| 86 | +## Response |
| 87 | + |
| 88 | +```json |
| 89 | +[ |
| 90 | + { |
| 91 | + "document_id": "doc_abc123", |
| 92 | + "chunk_number": 0, |
| 93 | + "content": "Introduction to the quarterly report...", |
| 94 | + "content_type": "text/plain", |
| 95 | + "score": 1.0, |
| 96 | + "metadata": { "department": "sales" } |
| 97 | + }, |
| 98 | + { |
| 99 | + "document_id": "doc_abc123", |
| 100 | + "chunk_number": 1, |
| 101 | + "content": "Revenue highlights for Q4...", |
| 102 | + "content_type": "text/plain", |
| 103 | + "score": 1.0, |
| 104 | + "metadata": { "department": "sales" } |
| 105 | + } |
| 106 | +] |
| 107 | +``` |
| 108 | + |
| 109 | +<Note> |
| 110 | +This is useful when you already know which chunks you need (e.g., from a previous retrieval result) and want to fetch their full content efficiently. |
| 111 | +</Note> |
0 commit comments