diff --git a/components-mdx/datasets-create-dataset-item.mdx b/components-mdx/datasets-create-dataset-item.mdx
index e3c11dd81..cf425cdf5 100644
--- a/components-mdx/datasets-create-dataset-item.mdx
+++ b/components-mdx/datasets-create-dataset-item.mdx
@@ -19,6 +19,24 @@ langfuse.create_dataset_item(
 )
 ```
 
+You can also add media to dataset item `input`, `expected_output`, or `metadata`:
+
+```python
+from langfuse.media import LangfuseMedia
+
+langfuse.create_dataset_item(
+    dataset_name="visual-qa",
+    input={
+        "question": "What is shown in this image?",
+        "image": LangfuseMedia(
+            file_path="./example.jpg",
+            content_type="image/jpeg",
+        ),
+    },
+    expected_output={"label": "invoice"},
+)
+```
+
 _See [Python SDK](/docs/sdk/python/sdk-v3) docs for details on how to initialize the Python client._
 
 </Tab>
@@ -29,7 +47,7 @@ import { LangfuseClient } from "@langfuse/client";
 
 const langfuse = new LangfuseClient();
 
-await langfuse.api.datasetItems.create({
+await langfuse.dataset.createItem({
   datasetName: "<dataset_name>",
   // any JS object or value
   input: {
@@ -46,6 +64,28 @@ await langfuse.api.datasetItems.create({
 });
 ```
 
+You can also add media to dataset item `input`, `expectedOutput`, or `metadata`:
+
+```ts
+import { LangfuseClient, LangfuseMedia } from "@langfuse/client";
+import fs from "node:fs";
+
+const langfuse = new LangfuseClient();
+
+await langfuse.dataset.createItem({
+  datasetName: "visual-qa",
+  input: {
+    question: "What is shown in this image?",
+    image: new LangfuseMedia({
+      source: "bytes",
+      contentBytes: fs.readFileSync("./example.jpg"),
+      contentType: "image/jpeg",
+    }),
+  },
+  expectedOutput: { label: "invoice" },
+});
+```
+
 _See [JS/TS SDK](/docs/sdk/typescript/guide) docs for details on how to initialize the JS/TS client._
 
 </Tab>
diff --git a/content/changelog/2026-02-11-versioned-dataset-experiments.mdx b/content/changelog/2026-02-11-versioned-dataset-experiments.mdx
index 028eb4a64..d5e23ce94 100644
--- a/content/changelog/2026-02-11-versioned-dataset-experiments.mdx
+++ b/content/changelog/2026-02-11-versioned-dataset-experiments.mdx
@@ -124,7 +124,7 @@ const versionedDataset = await langfuse.dataset.get("qa-dataset", {
 const result = await versionedDataset.runExperiment({
   name: "Baseline Experiment v1",
   description: "Testing against dataset from Dec 15",
-  task: async ({ item }) => {
+  task: async (item) => {
     const response = await observeOpenAI(new OpenAI()).chat.completions.create({
       model: "gpt-4.1",
       messages: [{ role: "user", content: item.input }]
diff --git a/content/changelog/2026-06-23-multi-modal-datasets.mdx b/content/changelog/2026-06-23-multi-modal-datasets.mdx
new file mode 100644
index 000000000..5495b75b3
--- /dev/null
+++ b/content/changelog/2026-06-23-multi-modal-datasets.mdx
@@ -0,0 +1,45 @@
+---
+date: 2026-06-23
+title: Multi-modal datasets
+description: Create Langfuse dataset items with images, audio, video, documents, and other attachments for SDK-based multi-modal experiments.
+author: Tobias Wochinger
+canonical: /docs/evaluation/experiments/datasets
+---
+
+import { ChangelogHeader } from "@/components/changelog/ChangelogHeader";
+import { Book, FlaskConical } from "lucide-react";
+
+<ChangelogHeader />
+
+<Video
+  src="https://static.langfuse.com/docs-videos/2026-06-17-create-multimodal-dataset-item.mp4"
+  aspectRatio={16 / 9}
+  gifStyle
+/>
+
+You can now add media attachments to Langfuse dataset items and use them in SDK-based multi-modal experiments. Dataset item `input`, `expectedOutput`, and `metadata` can include media uploaded from the UI or via the Python and JS/TS SDKs.
+
+Use this to build visual QA datasets, compare generated images against reference files, or run evaluations over audio, documents, and other multi-modal inputs. In SDK-based experiments, dataset media is resolved into media references by default, with helpers to fetch them as bytes, base64, or data URIs depending on the format your model provider expects.
+
+<Callout type="info">
+  Multi-modal datasets are supported for SDK-based experiments with Python SDK
+  `>= 4.10.0` and JS/TS SDK `@langfuse/client >= 5.6.0`. UI-based
+  experiments do not yet support dataset items with media attachments.
+</Callout>
+
+## Get started
+
+<Cards num={2}>
+  <Card
+    title="Datasets"
+    href="/docs/evaluation/experiments/datasets"
+    icon={<Book />}
+    arrow
+  />
+  <Card
+    title="Experiments via SDK"
+    href="/docs/evaluation/experiments/experiments-via-sdk"
+    icon={<FlaskConical />}
+    arrow
+  />
+</Cards>
diff --git a/content/docs/evaluation/experiments/data-model.mdx b/content/docs/evaluation/experiments/data-model.mdx
index 3e212458a..2881d9630 100644
--- a/content/docs/evaluation/experiments/data-model.mdx
+++ b/content/docs/evaluation/experiments/data-model.mdx
@@ -58,16 +58,30 @@ direction LR
 
 #### DatasetItem object [#datasetitem-object]
 
-| Attribute             | Type          | Required | Description                                                                                                                                                 |
-| --------------------- | ------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `id`                  | string        | Yes      | Unique identifier for the dataset item. Dataset items are upserted on their id. Id needs to be unique (project-level) and cannot be reused across datasets. |
-| `datasetId`           | string        | Yes      | ID of the dataset this item belongs to                                                                                                                      |
-| `input`               | object        | No       | Input data for the dataset item                                                                                                                             |
-| `expectedOutput`      | object        | No       | Expected output data for the dataset item                                                                                                                   |
-| `metadata`            | object        | No       | Additional metadata for the dataset item                                                                                                                    |
-| `sourceTraceId`       | string        | No       | ID of the source trace to link this dataset item to                                                                                                         |
-| `sourceObservationId` | string        | No       | ID of the source observation to link this dataset item to                                                                                                   |
-| `status`              | DatasetStatus | No       | Status of the dataset item. Defaults to ACTIVE for newly created items. Possible values: `ACTIVE`, `ARCHIVED`                                               |
+| Attribute             | Type          | Required | Description                                                                                                                                                          |
+| --------------------- | ------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `id`                  | string        | Yes      | Unique identifier for the dataset item. Dataset items are upserted on their id. Id needs to be unique (project-level) and cannot be reused across datasets.          |
+| `datasetId`           | string        | Yes      | ID of the dataset this item belongs to                                                                                                                               |
+| `input`               | object        | No       | Input data for the dataset item                                                                                                                                      |
+| `expectedOutput`      | object        | No       | Expected output data for the dataset item                                                                                                                            |
+| `metadata`            | object        | No       | Additional metadata for the dataset item                                                                                                                             |
+| `mediaReferences`     | object[]      | No       | Resolved media references found in `input`, `expectedOutput`, and `metadata`. Included on SDK dataset fetches and API responses that include resolved dataset media. |
+| `sourceTraceId`       | string        | No       | ID of the source trace to link this dataset item to                                                                                                                  |
+| `sourceObservationId` | string        | No       | ID of the source observation to link this dataset item to                                                                                                            |
+| `status`              | DatasetStatus | No       | Status of the dataset item. Defaults to ACTIVE for newly created items. Possible values: `ACTIVE`, `ARCHIVED`                                                        |
+
+#### DatasetItemMediaReference object [#datasetitemmediareference-object]
+
+Dataset item media references point from a stored media token in `input`, `expectedOutput`, or `metadata` to a signed media download URL.
+
+| Attribute         | Type   | Required       | Description                                                                                                                                 |
+| ----------------- | ------ | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------- |
+| `field`           | string | Yes            | Field enum for the dataset item property containing the reference. One of `input`, `expected_output` (for `expectedOutput`), or `metadata`. |
+| `referenceString` | string | Yes            | Original Langfuse media reference string stored in the dataset item.                                                                        |
+| `jsonPath`        | string | Yes            | JSONPath of the string holding the reference inside the field, for example `$['image']`.                                                    |
+| `media`           | object | Yes (nullable) | Resolved media metadata. `null` if the referenced media does not exist or has not been uploaded successfully.                               |
+
+The nested `media` object contains `mediaId`, `contentType`, `contentLength`, `url`, and `urlExpiry`. The `url` is a signed download URL and should be used before its expiration date. To refresh the signed URL, refetch the dataset.
 
 ### DatasetRun (Experiment Run) [#datasetrun-experiment-run]
 
diff --git a/content/docs/evaluation/experiments/datasets.mdx b/content/docs/evaluation/experiments/datasets.mdx
index f64410710..736ea77c8 100644
--- a/content/docs/evaluation/experiments/datasets.mdx
+++ b/content/docs/evaluation/experiments/datasets.mdx
@@ -39,6 +39,85 @@ import CreateDatasetItem from "@/components-mdx/datasets-create-dataset-item.mdx
 <CreateDatasetItem />
 </Steps>
 
+## Multi-modal dataset items
+
+Dataset item `input`, `expectedOutput`, and `metadata` fields can include media attachments such as images, audio, video, documents, and other files. You can add media from the Langfuse UI when creating or editing an item, or upload media through the Python and JS/TS SDKs with `LangfuseMedia`.
+
+<Callout type="info">
+  Multi-modal datasets are supported for SDK-based experiments with Python SDK
+  `>= 4.10.0` and JS/TS SDK `@langfuse/client >= 5.6.0`. UI-based
+  experiments do not yet support dataset items with media attachments.
+</Callout>
+
+In the UI, open a dataset item and use the attach button, drag-and-drop, or paste files into the `input`, `expectedOutput`, or `metadata` editor.
+
+<Video
+  src="https://static.langfuse.com/docs-videos/2026-06-17-create-multimodal-dataset-item.mp4"
+  aspectRatio={16 / 9}
+  gifStyle
+/>
+
+In the SDKs, wrap media in `LangfuseMedia` before creating the dataset item. The SDK uploads the media, stores a reference in the dataset item, and the Langfuse UI renders the attachment preview.
+
+<LangTabs items={["Python SDK", "JS/TS SDK"]}>
+<Tab>
+
+```python
+from langfuse import get_client
+from langfuse.media import LangfuseMedia
+
+langfuse = get_client()
+
+langfuse.create_dataset_item(
+    dataset_name="visual-qa",
+    input={
+        "question": "What is shown in this image?",
+        "image": LangfuseMedia(
+            file_path="./example.jpg",
+            content_type="image/jpeg",
+        ),
+    },
+    expected_output={"label": "invoice"},
+)
+
+dataset = langfuse.get_dataset("visual-qa")
+```
+
+</Tab>
+<Tab>
+
+```ts
+import { LangfuseClient, LangfuseMedia } from "@langfuse/client";
+import fs from "node:fs";
+
+const langfuse = new LangfuseClient();
+
+await langfuse.dataset.createItem({
+  datasetName: "visual-qa",
+  input: {
+    question: "What is shown in this image?",
+    image: new LangfuseMedia({
+      source: "bytes",
+      contentBytes: fs.readFileSync("./example.jpg"),
+      contentType: "image/jpeg",
+    }),
+  },
+  expectedOutput: { label: "invoice" },
+});
+
+const dataset = await langfuse.dataset.get("visual-qa");
+```
+
+</Tab>
+</LangTabs>
+
+See [Experiments via SDK](/docs/evaluation/experiments/experiments-via-sdk#multimodal-experiments) for using multi-modal items in experiments.
+
+<Callout type="info">
+  CSV imports are intended for text and structured JSON dataset items. Use the
+  UI item editor or SDKs for multi-modal dataset items.
+</Callout>
+
 ## Dataset Folders
 
 Datasets can be organized into virtual folders to group datasets serving similar use cases.
@@ -218,7 +297,7 @@ const versionedDataset = await langfuse.dataset.get("qa-dataset", {
 const result = await versionedDataset.runExperiment({
   name: "Baseline Experiment v1",
   description: "Running on dataset v1",
-  task: async ({ item }) => {
+  task: async (item) => {
     // Your LLM application logic here
     // For this example, we'll just return the expected output
     return item.expectedOutput;
@@ -369,7 +448,7 @@ import { LangfuseClient } from "@langfuse/client";
 
 const langfuse = new LangfuseClient();
 
-await langfuse.api.datasetItems.create({
+await langfuse.dataset.createItem({
   datasetName: "<dataset_name>",
   input: { text: "hello world" },
   expectedOutput: { text: "hello world" },
@@ -421,6 +500,7 @@ You can upsert items by providing the `id` of the item you want to update.
 
 ```python
 langfuse.create_dataset_item(
+    dataset_name="<dataset_name>",
     id="<item_id>",
     # example: update status to "ARCHIVED"
     status="ARCHIVED"
@@ -437,7 +517,8 @@ import { LangfuseClient } from "@langfuse/client";
 
 const langfuse = new LangfuseClient();
 
-await langfuse.api.datasetItems.create({
+await langfuse.dataset.createItem({
+  datasetName: "<dataset_name>",
   id: "<item_id>",
   // example: update status to "ARCHIVED"
   status: "ARCHIVED",
diff --git a/content/docs/evaluation/experiments/experiments-via-sdk.mdx b/content/docs/evaluation/experiments/experiments-via-sdk.mdx
index e879695cd..840453144 100644
--- a/content/docs/evaluation/experiments/experiments-via-sdk.mdx
+++ b/content/docs/evaluation/experiments/experiments-via-sdk.mdx
@@ -211,6 +211,90 @@ When using Langfuse datasets, dataset runs are automatically created in Langfuse
 
 Experiments always run on the latest dataset version at experiment time. Support for running experiments on specific dataset versions will be added to the SDK shortly.
 
+### Multi-modal experiments [#multimodal-experiments]
+
+SDK-based experiments can run on datasets that include media attachments in `input`, `expectedOutput`, or `metadata`. When you fetch the dataset via the SDK, each media token is hydrated into a signed `LangfuseMediaReference` by default.
+
+<Callout type="info">
+  Multi-modal datasets are supported for SDK-based experiments with Python SDK
+  `>= 4.10.0` and JS/TS SDK `@langfuse/client >= 5.6.0`. UI-based
+  experiments do not yet support dataset items with media attachments.
+</Callout>
+
+<LangTabs items={["Python SDK", "JS/TS SDK"]}>
+<Tab>
+{/* PYTHON SDK */}
+
+```python
+from langfuse import get_client
+from langfuse.media import LangfuseMediaReference
+
+langfuse = get_client()
+
+dataset = langfuse.get_dataset("visual-qa")
+
+def my_multi_modal_task(*, item, **kwargs):
+    image = item.input["image"]
+    assert isinstance(image, LangfuseMediaReference)
+
+    # Use the format expected by your model provider.
+    image_data_uri = image.fetch_data_uri()
+
+    # Call your multi-modal application here.
+    return run_visual_qa(
+        question=item.input["question"],
+        image=image_data_uri,
+    )
+
+result = dataset.run_experiment(
+    name="Visual QA",
+    task=my_multi_modal_task,
+)
+```
+
+</Tab>
+<Tab>
+{/* JS/TS SDK */}
+
+```typescript
+import {
+  LangfuseClient,
+  LangfuseMediaReference,
+} from "@langfuse/client";
+
+const langfuse = new LangfuseClient();
+
+const dataset = await langfuse.dataset.get("visual-qa");
+
+const result = await dataset.runExperiment({
+  name: "Visual QA",
+  task: async (item) => {
+    const image = item.input.image as LangfuseMediaReference;
+
+    // Use the format expected by your model provider.
+    const imageDataUri = await image.fetchDataUri();
+
+    // Call your multi-modal application here.
+    return runVisualQa({
+      question: item.input.question,
+      image: imageDataUri,
+    });
+  },
+});
+```
+
+</Tab>
+</LangTabs>
+
+`LangfuseMediaReference` exposes helpers to fetch the media as raw bytes, raw base64, or a data URI:
+
+| SDK    | Bytes           | Base64           | Data URI           |
+| ------ | --------------- | ---------------- | ------------------ |
+| Python | `fetch_bytes()` | `fetch_base64()` | `fetch_data_uri()` |
+| JS/TS  | `fetchBytes()`  | `fetchBase64()`  | `fetchDataUri()`   |
+
+The resolved URLs are signed and expire. If a URL expires before your experiment uses it, fetch the dataset again to receive fresh media references.
+
 ### Advanced Features
 
 Enhance your experiments with evaluators and advanced configuration options.
diff --git a/content/docs/observability/features/multi-modality.mdx b/content/docs/observability/features/multi-modality.mdx
index b7c0715f5..ffe391467 100644
--- a/content/docs/observability/features/multi-modality.mdx
+++ b/content/docs/observability/features/multi-modality.mdx
@@ -101,7 +101,7 @@ Supported formats:
 
 ### Custom attachments
 
-If you want to have more control or your media is not base64 encoded, you can upload arbitrary media attachments to Langfuse via the SDKs using the new `LangfuseMedia` class. Wrap media with LangfuseMedia before including it in trace inputs, outputs, or metadata. See the multi-modal documentation for examples.
+If you want to have more control or your media is not base64 encoded, you can upload arbitrary media attachments to Langfuse via the SDKs using the new `LangfuseMedia` class. Wrap media with LangfuseMedia before including it in trace inputs, outputs, metadata, or dataset items. See the multi-modal documentation for examples.
 
 <LangTabs items={["Python SDK", "JS/TS SDK"]}>
 <Tab title="Python SDK">
@@ -248,6 +248,8 @@ The base64 data URIs and the wrapped `LangfuseMedia` objects in Langfuse traces
 
 Based on this token, the Langfuse UI can automatically detect the `mediaId` and render the media file inline. The `LangfuseMedia` class provides utility functions to extract the `mediaId` from the reference string.
 
+For multi-modal datasets, use [Experiments via SDK](/docs/evaluation/experiments/experiments-via-sdk#multimodal-experiments) to fetch dataset items with resolved `LangfuseMediaReference` objects and pass the media into your model provider.
+
 ### 3. Resolving Media References
 
 When dealing with traces, observations, or dataset items that include media references, you can convert them back to their base64 data URI format using the `resolve_media_references` utility method provided by the Langfuse client. This is particularly useful for reinserting the original content during fine-tuning, dataset runs, or replaying a generation. The utility method traverses the parsed object and returns a deep copy with all media reference strings replaced by the corresponding base64 data URI representations.
diff --git a/content/self-hosting/deployment/infrastructure/blobstorage.mdx b/content/self-hosting/deployment/infrastructure/blobstorage.mdx
index aa65beedc..960058e18 100644
--- a/content/self-hosting/deployment/infrastructure/blobstorage.mdx
+++ b/content/self-hosting/deployment/infrastructure/blobstorage.mdx
@@ -42,12 +42,12 @@ They need to be provided for the Langfuse Web and Langfuse Worker containers.
 
 ### Optional Configuration
 
-Langfuse also uses S3 for batch exports and for multi-modal tracing.
+Langfuse also uses S3 for batch exports, multi-modal tracing, and multi-modal datasets.
 Those use-cases are opt-in and can be configured separately.
 Use the following information to enable them.
 Langfuse uses the credentials to generate short-lived, pre-signed URLs that allow SDKs to upload media assets or to download batch exports.
 
-#### Multi-Modal Tracing
+#### Multi-Modal Tracing and Datasets
 
 | Variable                                        | Required / Default | Description                                                                                                          |
 | ----------------------------------------------- | ------------------ | -------------------------------------------------------------------------------------------------------------------- |
@@ -204,7 +204,7 @@ Please follow the MinIO documentation or use a cloud provider managed blob store
 
 #### Configuring Minio for Media Uploads [#minio-media-uploads]
 
-To enable multimodal tracing, presigned URLs allow SDK clients and browsers outside the Docker network to directly upload and download media assets. Therefore, the `LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT` must resolve to the Docker host's address.
+To enable multi-modal tracing and multi-modal datasets, presigned URLs allow SDK clients and browsers outside the Docker network to directly upload and download media assets. Therefore, the `LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT` must resolve to the Docker host's address.
 
 **Development Environment:** When running `docker compose` locally, set `LANGFUSE_S3_MEDIA_UPLOAD_ENDPOINT` to `http://localhost:9090` to ensure presigned URLs correctly loop back to the local instance.