Skip to content

Commit 920526d

Browse files
authored
Merge pull request #889 from ftnext/feature/cohere-transcribe-api
feat: Add Cohere Transcribe API support (recognize_cohere_api)
2 parents 2e55772 + 3424d9d commit 920526d

7 files changed

Lines changed: 118 additions & 4 deletions

File tree

.github/workflows/unittests.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,16 @@ jobs:
4646
- name: Install Python dependencies (Ubuntu, <=3.12)
4747
if: matrix.os == 'ubuntu-latest' && matrix.python-version != '3.13'
4848
run: |
49-
python -m pip install .[dev,audio,pocketsphinx,google-cloud,whisper-local,faster-whisper,openai,groq,vosk]
49+
python -m pip install .[dev,audio,pocketsphinx,google-cloud,whisper-local,faster-whisper,openai,groq,vosk,cohere-api]
5050
- name: Install Python dependencies (Ubuntu, 3.13)
5151
if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.13'
5252
run: |
5353
python -m pip install standard-aifc setuptools
54-
python -m pip install .[dev,audio,pocketsphinx,google-cloud,openai,groq,vosk]
54+
python -m pip install .[dev,audio,pocketsphinx,google-cloud,openai,groq,vosk,cohere-api]
5555
- name: Install Python dependencies (Windows)
5656
if: matrix.os == 'windows-latest'
5757
run: |
58-
python -m pip install .[dev,whisper-local,faster-whisper,google-cloud,openai,groq,vosk]
58+
python -m pip install .[dev,whisper-local,faster-whisper,google-cloud,openai,groq,vosk,cohere-api]
5959
- name: Set up vosk model
6060
run: python -m speech_recognition.cli download vosk
6161
- name: Test with unittest

README.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ Speech recognition engine/API support:
6565
* `OpenAI Whisper API <https://platform.openai.com/docs/guides/speech-to-text>`__
6666
* OpenAI compatible self-hosted endpoints (e.g. vLLM, Ollama)
6767
* `Groq Whisper API <https://console.groq.com/docs/speech-to-text>`__
68+
* `Cohere Transcribe API <https://docs.cohere.com/docs/transcribe>`__
6869

6970
**Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details.
7071

@@ -123,6 +124,7 @@ To use all of the functionality of the library, you should have:
123124
* **openai** (required only if you need to use OpenAI Whisper API speech recognition ``recognizer_instance.recognize_openai``)
124125
* includes OpenAI compatible self-hosted endpoints (e.g. vLLM, Ollama)
125126
* **groq** (required only if you need to use Groq Whisper API speech recognition ``recognizer_instance.recognize_groq``)
127+
* **cohere** (required only if you need to use Cohere Transcribe API speech recognition ``recognizer_instance.recognize_cohere_api``; install with ``pip install SpeechRecognition[cohere-api]``. Set ``CO_API_KEY`` as documented by the Cohere SDK.)
126128

127129
The following requirements are optional, but can improve or extend functionality in some situations:
128130

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ groq = [
7575
"groq",
7676
"httpx < 0.28",
7777
]
78+
cohere-api = [
79+
"cohere>=5.21.0",
80+
]
7881
assemblyai = ["requests"]
7982
vosk = ["vosk"]
8083

reference/library-reference.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,11 @@ Raises a ``speech_recognition.UnknownValueError`` exception if the speech is uni
291291

292292
.. autofunction:: speech_recognition.recognizers.whisper_api.groq.recognize
293293

294+
``recognizer_instance.recognize_cohere_api(audio_data: AudioData, *, language: str, model = "cohere-transcribe-03-2026")``
295+
--------------------------------------------------------------------------------------------------------------------------
296+
297+
.. autofunction:: speech_recognition.recognizers.cohere_api.recognize
298+
294299
``AudioSource``
295300
---------------
296301

speech_recognition/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1281,7 +1281,7 @@ def flush(self, *args, **kwargs):
12811281
# At this time, the dependencies are not yet installed, resulting in a ModuleNotFoundError.
12821282
# This is a workaround to resolve this issue
12831283
try:
1284-
from .recognizers import google, google_cloud, pocketsphinx, vosk
1284+
from .recognizers import cohere_api, google, google_cloud, pocketsphinx, vosk
12851285
from .recognizers.whisper_api import groq, openai
12861286
from .recognizers.whisper_local import faster_whisper, whisper
12871287
except (ModuleNotFoundError, ImportError):
@@ -1293,6 +1293,7 @@ def flush(self, *args, **kwargs):
12931293
Recognizer.recognize_faster_whisper = faster_whisper.recognize # type: ignore[attr-defined]
12941294
Recognizer.recognize_openai = openai.recognize # type: ignore[attr-defined]
12951295
Recognizer.recognize_groq = groq.recognize # type: ignore[attr-defined]
1296+
Recognizer.recognize_cohere_api = cohere_api.recognize # type: ignore[attr-defined]
12961297
Recognizer.recognize_sphinx = pocketsphinx.recognize # type: ignore[attr-defined]
12971298
Recognizer.recognize_vosk = vosk.recognize # type: ignore[attr-defined]
12981299

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
from io import BytesIO
5+
6+
from speech_recognition.audio import AudioData
7+
from speech_recognition.exceptions import SetupError
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
def recognize(
13+
recognizer,
14+
audio_data: AudioData,
15+
*,
16+
language: str,
17+
model: str = "cohere-transcribe-03-2026",
18+
) -> str:
19+
"""Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the `Cohere Transcribe <https://docs.cohere.com/docs/transcribe>`__ API via the official Python SDK.
20+
21+
Requires the ``cohere`` package (install with ``pip install SpeechRecognition[cohere-api]``).
22+
Set environment variable ``CO_API_KEY`` as documented by Cohere; this library does not read or override it in code.
23+
24+
``language`` is required by the Cohere transcription API (e.g. ``\"en\"``, ``\"ja\"``).
25+
26+
Detail: https://docs.cohere.com/reference/create-audio-transcription
27+
"""
28+
try:
29+
import cohere
30+
except ImportError:
31+
raise SetupError(
32+
"missing cohere module: ensure that cohere is set up correctly "
33+
"(e.g. pip install SpeechRecognition[cohere-api])."
34+
)
35+
36+
if not isinstance(audio_data, AudioData):
37+
raise ValueError("``audio_data`` must be an ``AudioData`` instance")
38+
39+
wav_data = BytesIO(audio_data.get_wav_data())
40+
wav_data.name = "SpeechRecognition_audio.wav"
41+
42+
client = cohere.ClientV2()
43+
logger.debug(
44+
"cohere audio.transcriptions.create: model=%r language=%r",
45+
model,
46+
language,
47+
)
48+
response = client.audio.transcriptions.create(
49+
model=model,
50+
file=wav_data,
51+
language=language,
52+
)
53+
return response.text
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from unittest.mock import MagicMock, patch
2+
3+
from speech_recognition import AudioData, Recognizer
4+
from speech_recognition.recognizers import cohere_api
5+
6+
7+
@patch("cohere.ClientV2")
8+
def test_transcribe_default_model(mock_client_cls):
9+
mock_response = MagicMock()
10+
mock_response.text = "Transcription by Cohere"
11+
mock_client = MagicMock()
12+
mock_client.audio.transcriptions.create.return_value = mock_response
13+
mock_client_cls.return_value = mock_client
14+
15+
audio_data = MagicMock(spec=AudioData)
16+
audio_data.get_wav_data.return_value = b"fake_wav"
17+
18+
actual = cohere_api.recognize(
19+
MagicMock(spec=Recognizer), audio_data, language="en"
20+
)
21+
22+
assert actual == "Transcription by Cohere"
23+
audio_data.get_wav_data.assert_called_once()
24+
mock_client_cls.assert_called_once_with()
25+
mock_client.audio.transcriptions.create.assert_called_once()
26+
call_kw = mock_client.audio.transcriptions.create.call_args.kwargs
27+
assert call_kw["model"] == "cohere-transcribe-03-2026"
28+
assert call_kw["language"] == "en"
29+
assert "file" in call_kw
30+
31+
32+
@patch("cohere.ClientV2")
33+
def test_transcribe_with_language(mock_client_cls):
34+
mock_response = MagicMock()
35+
mock_response.text = "Japanese transcription"
36+
mock_client = MagicMock()
37+
mock_client.audio.transcriptions.create.return_value = mock_response
38+
mock_client_cls.return_value = mock_client
39+
40+
audio_data = MagicMock(spec=AudioData)
41+
audio_data.get_wav_data.return_value = b"fake_wav"
42+
43+
actual = cohere_api.recognize(
44+
MagicMock(spec=Recognizer), audio_data, language="ja"
45+
)
46+
47+
assert actual == "Japanese transcription"
48+
call_kw = mock_client.audio.transcriptions.create.call_args.kwargs
49+
assert call_kw["model"] == "cohere-transcribe-03-2026"
50+
assert call_kw["language"] == "ja"

0 commit comments

Comments
 (0)