Skip to content

Commit 3841b13

Browse files
authored
Merge pull request #91 from dreadnode/feature/multimodal-extras-remove-pydub
feat: add multimodal extras and remove pydub for Python 3.13 compatibility
2 parents 5ddbb7f + ba90a3c commit 3841b13

11 files changed

Lines changed: 224 additions & 218 deletions

File tree

README.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,33 @@ pip install -U dreadnode
8585
```
8686

8787
If you want to build from source:
88+
8889
```bash
90+
8991
poetry install
92+
# Install with multimodal extras
93+
poetry install --extras multimodal
94+
95+
# Install with training extras
96+
poetry install --extras training
97+
98+
# Install with all extras
99+
poetry install --all-extras
100+
```
101+
102+
## Installation from PyPI with Optional Features
103+
104+
For advanced media processing capabilities (audio, video, images), install the multimodal extras:
105+
106+
```bash
107+
# Multimodal support (audio, video processing)
108+
pip install -U dreadnode[multimodal]
109+
110+
# Training support (ML model integration)
111+
pip install -U dreadnode[training]
112+
113+
# All optional features
114+
pip install -U dreadnode[all]
90115
```
91116

92117
See our **[installation guide](https://docs.dreadnode.io/strikes/install)** for more options.

docs/sdk/data_types.mdx

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ Supports:
2424
- Local file paths (str or Path)
2525
- Numpy arrays with sample rate
2626
- Raw bytes
27-
- Pydub AudioSegment object
2827

2928
Initialize an Audio object.
3029

@@ -36,7 +35,6 @@ Initialize an Audio object.
3635
- A path to a local audio file (str or Path)
3736
- A numpy array (requires sample\_rate)
3837
- Raw bytes
39-
- A pydub AudioSegment
4038
* **`sample_rate`**
4139
(`int | None`, default:
4240
`None`
@@ -70,7 +68,6 @@ def __init__(
7068
- A path to a local audio file (str or Path)
7169
- A numpy array (requires sample_rate)
7270
- Raw bytes
73-
- A pydub AudioSegment
7471
sample_rate: Required when using numpy arrays
7572
caption: Optional caption for the audio
7673
format: Optional format to use (default is wav for numpy arrays)

docs/usage/rich-objects.mdx

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,6 @@ with dn.run("audio-example-numpy"):
7676
dn.log_input("my-audio", dn.Audio(audio_data, sample_rate=sample_rate))
7777
```
7878

79-
```python AudioSegment
80-
import dreadnode as dn
81-
from pydub import AudioSegment
82-
83-
# Load audio with pydub
84-
audio_segment = AudioSegment.from_file("path/to/audio.mp3")
85-
86-
with dn.run("audio-example-segment"):
87-
dn.log_input("my-audio", dn.Audio(audio_segment))
88-
```
89-
9079
```python Raw Bytes
9180
import dreadnode as dn
9281

dreadnode/data_types/audio.py

Lines changed: 11 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,15 @@
33
from pathlib import Path
44

55
import numpy as np
6-
import soundfile as sf # type: ignore # noqa: PGH003
7-
from pydub import AudioSegment # type: ignore # noqa: PGH003
6+
7+
try:
8+
import soundfile as sf # type: ignore # noqa: PGH003
9+
except ImportError:
10+
sf = None
811

912
from dreadnode.data_types.base_data_type import BaseDataType
1013

11-
AudioDataType: t.TypeAlias = str | Path | np.ndarray[t.Any, t.Any] | bytes | AudioSegment
14+
AudioDataType: t.TypeAlias = str | Path | np.ndarray[t.Any, t.Any] | bytes
1215

1316

1417
class Audio(BaseDataType):
@@ -19,7 +22,6 @@ class Audio(BaseDataType):
1922
- Local file paths (str or Path)
2023
- Numpy arrays with sample rate
2124
- Raw bytes
22-
- Pydub AudioSegment object
2325
"""
2426

2527
def __init__(
@@ -37,11 +39,15 @@ def __init__(
3739
- A path to a local audio file (str or Path)
3840
- A numpy array (requires sample_rate)
3941
- Raw bytes
40-
- A pydub AudioSegment
4142
sample_rate: Required when using numpy arrays
4243
caption: Optional caption for the audio
4344
format: Optional format to use (default is wav for numpy arrays)
4445
"""
46+
if sf is None:
47+
raise ImportError(
48+
"Audio processing requires optional dependencies. "
49+
"Install with: pip install dreadnode[multimodal]"
50+
)
4551
self._data = data
4652
self._sample_rate = sample_rate
4753
self._caption = caption
@@ -69,8 +75,6 @@ def _process_audio_data(self) -> tuple[bytes, str, int | None, float | None]:
6975
return self._process_numpy_array()
7076
if isinstance(self._data, bytes):
7177
return self._process_raw_bytes()
72-
if isinstance(self._data, AudioSegment):
73-
return self._process_pydub_audio_segment()
7478
raise TypeError(f"Unsupported audio data type: {type(self._data)}")
7579

7680
def _process_file_path(self) -> tuple[bytes, str, int | None, float | None]:
@@ -123,29 +127,6 @@ def _process_raw_bytes(self) -> tuple[bytes, str, int | None, float | None]:
123127
raise TypeError("Raw bytes are expected for this processing method.")
124128
return self._data, format_name, self._sample_rate, None
125129

126-
def _process_pydub_audio_segment(self) -> tuple[bytes, str, int | None, float | None]:
127-
"""
128-
Process pydub AudioSegment to bytes.
129-
Returns:
130-
A tuple of (audio_bytes, format_name, sample_rate, duration)
131-
"""
132-
133-
if not isinstance(self._data, AudioSegment):
134-
raise TypeError("AudioSegment is expected for this processing method.")
135-
136-
sample_rate = self._data.frame_rate
137-
138-
buffer = io.BytesIO()
139-
format_name = self._format or "wav"
140-
self._data.export(buffer, format=format_name)
141-
buffer.seek(0)
142-
audio_bytes = buffer.read()
143-
144-
# PyDUB provides duration in milliseconds, convert to seconds for consistency
145-
duration = len(self._data) / 1000.0
146-
147-
return audio_bytes, format_name, sample_rate, duration
148-
149130
def _generate_metadata(
150131
self, format_name: str, sample_rate: int | None, duration: float | None
151132
) -> dict[str, str | int | float | None]:
@@ -166,20 +147,13 @@ def _generate_metadata(
166147
metadata["source-type"] = "numpy.ndarray"
167148
elif isinstance(self._data, bytes):
168149
metadata["source-type"] = "bytes"
169-
elif isinstance(self._data, AudioSegment):
170-
metadata["source-type"] = "pydub.AudioSegment"
171150

172151
if sample_rate is not None:
173152
metadata["sample-rate"] = sample_rate
174153

175154
if duration is not None:
176155
metadata["duration"] = duration
177156

178-
# Add pydub-specific metadata if available
179-
if isinstance(self._data, AudioSegment):
180-
metadata["channels"] = self._data.channels
181-
metadata["sample-width"] = self._data.sample_width
182-
183157
if self._caption:
184158
metadata["caption"] = self._caption
185159

dreadnode/data_types/image.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
from pathlib import Path
55

66
import numpy as np
7-
from PIL import Image as PILImage
87

98
from dreadnode.data_types.base_data_type import BaseDataType
109

11-
ImageDataType = PILImage.Image | np.ndarray[t.Any, t.Any]
10+
try:
11+
from PIL import Image as PILImage
12+
except ImportError:
13+
PILImage = None # type: ignore[assignment]
14+
15+
ImageDataType = t.Any | np.ndarray[t.Any, t.Any]
1216
ImageDataOrPathType = str | Path | bytes | ImageDataType
1317

1418

@@ -44,6 +48,10 @@ def __init__(
4448
caption: Optional caption for the image
4549
format: Optional format to use when saving (png, jpg, etc.)
4650
"""
51+
if PILImage is None:
52+
raise ImportError(
53+
"Image processing requires PIL (Pillow). Install with: pip install dreadnode[multimodal]"
54+
)
4755
self._data = data
4856
self._mode = mode
4957
self._caption = caption

dreadnode/data_types/video.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,19 @@
44
from pathlib import Path
55

66
import numpy as np
7-
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip # type: ignore # noqa: PGH003
8-
from moviepy.video.VideoClip import VideoClip # type: ignore # noqa: PGH003
97
from numpy.typing import NDArray
108

119
from dreadnode.data_types.base_data_type import BaseDataType
1210

13-
VideoDataType: t.TypeAlias = str | Path | NDArray[t.Any] | bytes | list[NDArray[t.Any]] | VideoClip
11+
try:
12+
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip # type: ignore # noqa: PGH003
13+
from moviepy.video.VideoClip import VideoClip # type: ignore # noqa: PGH003
14+
except ImportError:
15+
ImageSequenceClip = None
16+
VideoClip = None
17+
18+
19+
VideoDataType: t.TypeAlias = str | Path | NDArray[t.Any] | bytes | list[NDArray[t.Any]] | t.Any
1420

1521

1622
class Video(BaseDataType):
@@ -70,8 +76,13 @@ def to_serializable(self) -> tuple[bytes, dict[str, t.Any]]:
7076
return self._process_bytes()
7177
if isinstance(self._data, (np.ndarray, list)):
7278
return self._process_numpy_array()
73-
if isinstance(self._data, VideoClip):
79+
if VideoClip is not None and isinstance(self._data, VideoClip):
7480
return self._process_moviepy_clip()
81+
if VideoClip is None and hasattr(self._data, "write_videofile"):
82+
raise ImportError(
83+
"MoviePy VideoClip detected but moviepy not installed. "
84+
"Install with: pip install dreadnode[multimodal]"
85+
)
7586
raise TypeError(f"Unsupported video data type: {type(self._data)}")
7687

7788
def _process_file_path(self) -> tuple[bytes, dict[str, t.Any]]:
@@ -110,13 +121,31 @@ def _process_numpy_array(self) -> tuple[bytes, dict[str, t.Any]]:
110121
Returns:
111122
A tuple of (video_bytes, metadata_dict)
112123
"""
124+
if ImageSequenceClip is None:
125+
raise ImportError(
126+
"Video processing from numpy arrays requires moviepy. "
127+
"Install with: pip install dreadnode[multimodal]"
128+
)
113129
if not self._fps:
114130
raise ValueError("fps is required for numpy array video frames")
115131
if not isinstance(self._data, (np.ndarray, list)):
116132
raise TypeError("data must be a numpy array or list of numpy arrays")
133+
134+
# Type guard for mypy
135+
assert ImageSequenceClip is not None # noqa: S101
136+
137+
frames = self._extract_frames_from_data()
138+
if not frames:
139+
raise ValueError("No frames found in input data")
140+
141+
return self._create_video_from_frames_data(frames)
142+
143+
def _extract_frames_from_data(self) -> list[NDArray[t.Any]]:
144+
"""Extract frames from numpy array or list data."""
117145
frames = []
118146
rgb_dim = 3
119147
rgba_dim = 4
148+
120149
if isinstance(self._data, np.ndarray):
121150
if self._data.ndim == rgb_dim: # Single frame
122151
frames = [self._data]
@@ -127,23 +156,23 @@ def _process_numpy_array(self) -> tuple[bytes, dict[str, t.Any]]:
127156
elif isinstance(self._data, list):
128157
frames = self._data
129158

130-
if not frames:
131-
raise ValueError("No frames found in input data")
159+
return frames
132160

161+
def _create_video_from_frames_data(
162+
self, frames: list[NDArray[t.Any]]
163+
) -> tuple[bytes, dict[str, t.Any]]:
164+
"""Create video file from frames."""
133165
frame_height, frame_width = frames[0].shape[:2]
134-
135166
temp_fd, temp_path = tempfile.mkstemp(suffix=f".{self._format}")
136167
os.close(temp_fd)
137168

138169
try:
139170
# Create clip and write to file
140171
clip = ImageSequenceClip(frames, fps=self._fps)
141-
142172
clip.write_videofile(
143173
temp_path,
144174
fps=self._fps,
145175
)
146-
147176
video_bytes = Path(temp_path).read_bytes()
148177

149178
metadata = self._generate_metadata(self._format)

examples/log_object/audio.ipynb

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,16 @@
66
"source": [
77
"# Dreadnode Audio Logging\n",
88
"\n",
9-
"This notebook demonstrates how to log audio data using Dreadnode's `Audio` data type. The examples cover various audio formats and sources including file paths, numpy arrays, and pydub AudioSegment objects.\n",
9+
"This notebook demonstrates how to log audio data using Dreadnode's `Audio` data type. The examples cover various audio formats and sources including file paths, and numpy arrays.\n",
1010
"\n",
1111
"## Features\n",
1212
"\n",
1313
"- Log audio files directly from disk (WAV, MP3, etc.)\n",
1414
"- Convert and log numpy arrays as audio\n",
15-
"- Process and log pydub AudioSegment objects\n",
16-
"- Add captions and metadata to audio logs"
15+
"- Add captions and metadata to audio logs\n",
16+
"\n",
17+
"⚠️ Note: Ensure you have installed the multimodal extras to use the Audio data type:\n",
18+
"`pip install dreadnode[multimodal]`"
1719
]
1820
},
1921
{
@@ -128,32 +130,6 @@
128130
" dn.log_input(\"stereo_sine\", Audio(stereo, sample_rate=sample_rate, caption=\"Stereo audio (440 Hz left, 880 Hz right)\"))"
129131
]
130132
},
131-
{
132-
"cell_type": "markdown",
133-
"metadata": {},
134-
"source": [
135-
"## 3. Pydub AudioSegment Examples\n",
136-
"\n",
137-
"Pydub is a popular library for audio manipulation in Python. Dreadnode supports logging AudioSegment objects directly, which enables powerful audio processing before logging."
138-
]
139-
},
140-
{
141-
"cell_type": "code",
142-
"execution_count": null,
143-
"metadata": {},
144-
"outputs": [],
145-
"source": [
146-
"from pydub import AudioSegment\n",
147-
"from pydub.generators import Sine\n",
148-
"\n",
149-
"with dn.run(\"audio_pydub_examples\") as r:\n",
150-
" # Load the file with pydub\n",
151-
" audio_segment = AudioSegment.from_file(audio_file_path)\n",
152-
" \n",
153-
" # Log the original AudioSegment\n",
154-
" dn.log_input(\"pydub_original\", Audio(audio_segment, caption=\"Original audio with pydub\"))\n"
155-
]
156-
},
157133
{
158134
"cell_type": "markdown",
159135
"metadata": {},
@@ -181,7 +157,6 @@
181157
"\n",
182158
"1. Audio files (e.g., WAV, MP3 files)\n",
183159
"2. Numpy arrays with sample rate\n",
184-
"3. Pydub AudioSegment objects\n",
185160
"4. Audio with custom metadata and captions\n",
186161
"\n",
187162
"We also showed more advanced audio processing techniques including:\n",
@@ -194,7 +169,7 @@
194169
],
195170
"metadata": {
196171
"kernelspec": {
197-
"display_name": ".venv",
172+
"display_name": "dreadnode-py3.12",
198173
"language": "python",
199174
"name": "python3"
200175
},

examples/log_object/image.ipynb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414
"- Convert and log PIL Image objects\n",
1515
"- Transform numpy arrays into images\n",
1616
"- Handle raw bytes and base64 encoded images\n",
17-
"- Convert between image modes (RGB, RGBA, grayscale)"
17+
"- Convert between image modes (RGB, RGBA, grayscale)\n",
18+
"\n",
19+
"⚠️ Note: Ensure you have installed the multimodal extras to use the Video data type:\n",
20+
"`pip install dreadnode[multimodal]`"
1821
]
1922
},
2023
{
@@ -265,7 +268,7 @@
265268
],
266269
"metadata": {
267270
"kernelspec": {
268-
"display_name": ".venv",
271+
"display_name": "Python 3",
269272
"language": "python",
270273
"name": "python3"
271274
},

0 commit comments

Comments
 (0)