Skip to content

Commit c038ff2

Browse files
committed
Merge branch 'mergeability-pr-45549' into all-defects
2 parents 64c68c1 + ccade7f commit c038ff2

5 files changed

Lines changed: 15 additions & 15 deletions

File tree

src/transformers/models/cohere_asr/feature_extraction_cohere_asr.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -284,17 +284,17 @@ def __call__(
284284
f"Only mono-channel audio is supported for input to {self.__class__.__name__}. "
285285
"We will take the mean of the channels to convert to mono."
286286
)
287-
raw_speech = raw_speech.mean(-1)
287+
raw_speech = raw_speech.mean(1)
288288

289289
is_batched_sequence = isinstance(raw_speech, (list, tuple))
290290
if is_batched_sequence:
291-
for speech in raw_speech:
291+
for index, speech in enumerate(raw_speech):
292292
if len(speech.shape) > 1:
293293
logger.warning(
294294
f"Only mono-channel audio is supported for input to {self.__class__.__name__}. "
295295
"We will take the mean of the channels to convert to mono."
296296
)
297-
speech = speech.mean(-1)
297+
raw_speech[index] = speech.mean(0)
298298

299299
if is_batched_torch or is_batched_sequence:
300300
raw_speech = [speech.to(torch.float32) for speech in raw_speech]

src/transformers/models/lasr/feature_extraction_lasr.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,17 +232,17 @@ def __call__(
232232
f"Only mono-channel audio is supported for input to {self.__class__.__name__}. "
233233
"We will take the mean of the channels to convert to mono."
234234
)
235-
raw_speech = raw_speech.mean(-1)
235+
raw_speech = raw_speech.mean(1)
236236

237237
is_batched_sequence = isinstance(raw_speech, (list, tuple))
238238
if is_batched_sequence:
239-
for speech in raw_speech:
239+
for index, speech in enumerate(raw_speech):
240240
if len(speech.shape) > 1:
241241
logger.warning(
242242
f"Only mono-channel audio is supported for input to {self.__class__.__name__}. "
243243
"We will take the mean of the channels to convert to mono."
244244
)
245-
speech = speech.mean(-1)
245+
raw_speech[index] = speech.mean(0)
246246

247247
if is_batched_torch or is_batched_sequence:
248248
raw_speech = [speech[:, None].to(torch.float32) for speech in raw_speech]

src/transformers/models/parakeet/feature_extraction_parakeet.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,17 +217,17 @@ def __call__(
217217
f"Only mono-channel audio is supported for input to {self.__class__.__name__}. "
218218
"We will take the mean of the channels to convert to mono."
219219
)
220-
raw_speech = raw_speech.mean(-1)
220+
raw_speech = raw_speech.mean(1)
221221

222222
is_batched_sequence = isinstance(raw_speech, (list, tuple))
223223
if is_batched_sequence:
224-
for speech in raw_speech:
224+
for index, speech in enumerate(raw_speech):
225225
if len(speech.shape) > 1:
226226
logger.warning(
227227
f"Only mono-channel audio is supported for input to {self.__class__.__name__}. "
228228
"We will take the mean of the channels to convert to mono."
229229
)
230-
speech = speech.mean(-1)
230+
raw_speech[index] = speech.mean(0)
231231

232232
if is_batched_torch or is_batched_sequence:
233233
raw_speech = [speech[:, None].to(torch.float32) for speech in raw_speech]

src/transformers/models/phi4_multimodal/feature_extraction_phi4_multimodal.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,17 +145,17 @@ def __call__(
145145
f"Only mono-channel audio is supported for input to {self.__class__.__name__}. "
146146
"We will take the mean of the channels to convert to mono."
147147
)
148-
raw_speech = raw_speech.mean(-1)
148+
raw_speech = raw_speech.mean(1)
149149

150150
is_batched_sequence = isinstance(raw_speech, (list, tuple))
151151
if is_batched_sequence:
152-
for speech in raw_speech:
152+
for index, speech in enumerate(raw_speech):
153153
if len(speech.shape) > 1:
154154
logger.warning(
155155
f"Only mono-channel audio is supported for input to {self.__class__.__name__}. "
156156
"We will take the mean of the channels to convert to mono."
157157
)
158-
speech = speech.mean(-1)
158+
raw_speech[index] = speech.mean(0)
159159

160160
if is_batched_torch or is_batched_sequence:
161161
raw_speech = [speech[:, None].to(torch.float32) for speech in raw_speech]

src/transformers/models/voxtral_realtime/feature_extraction_voxtral_realtime.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,17 +203,17 @@ def __call__(
203203
f"Only mono-channel audio is supported for input to {self.__class__.__name__}. "
204204
"We will take the mean of the channels to convert to mono."
205205
)
206-
raw_speech = raw_speech.mean(-1)
206+
raw_speech = raw_speech.mean(1)
207207

208208
is_batched_sequence = isinstance(raw_speech, (list, tuple))
209209
if is_batched_sequence:
210-
for speech in raw_speech:
210+
for index, speech in enumerate(raw_speech):
211211
if len(speech.shape) > 1:
212212
logger.warning(
213213
f"Only mono-channel audio is supported for input to {self.__class__.__name__}. "
214214
"We will take the mean of the channels to convert to mono."
215215
)
216-
speech = speech.mean(-1)
216+
raw_speech[index] = speech.mean(0)
217217

218218
if is_batched_torch or is_batched_sequence:
219219
raw_speech = [speech[:, None].to(torch.float32) for speech in raw_speech]

0 commit comments

Comments
 (0)