Skip to content

Commit 1039f44

Browse files
Merge pull request #540 from watson-developer-cloud/ws
new(WS): web socket-client library for STT weboscket
2 parents 6882608 + 7038c0f commit 1039f44

11 files changed

Lines changed: 392 additions & 216 deletions

appveyor.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
environment:
2+
3+
matrix:
4+
5+
- PYTHON: "C:\\Python27"
6+
- PYTHON: "C:\\Python35"
7+
- PYTHON: "C:\\Python27-x64"
8+
- PYTHON: "C:\\Python35-x64"
9+
- PYTHON: "C:\\Python36-x64"
10+
11+
install:
12+
13+
# Install Python (from the official .msi of https://python.org) and pip when
14+
# not already installed.
15+
- ps: if (-not(Test-Path($env:PYTHON))) { & appveyor\install.ps1 }
16+
17+
# Prepend newly installed Python to the PATH of this build
18+
- "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
19+
20+
- "python -m pip install --upgrade pip"
21+
22+
- "pip install --editable ."
23+
24+
- "pip install -r requirements-dev.txt"
25+
26+
build: off
27+
28+
test_script:
29+
30+
- ps: py.test --reruns 3 --cov=watson_developer_cloud
31+
32+
deploy: off
33+
34+
matrix:
35+
fast_finish: true
Lines changed: 81 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,46 @@
11
# You need to install pyaudio to run this example
22
# pip install pyaudio
33

4-
# Note that you need to record just once. You will not be able to send
5-
# more audio after the initial recording.
4+
# When using a microphone, the AudioSource `input` parameter would be
5+
# initialised as a queue. The pyaudio stream would be continuosly adding
6+
# recordings to the queue, and the websocket client would be sending the
7+
# recordings to the speech to text service
68

79
from __future__ import print_function
810
import pyaudio
9-
import tempfile
1011
from watson_developer_cloud import SpeechToTextV1
11-
from watson_developer_cloud.websocket import RecognizeCallback
12+
from watson_developer_cloud.websocket import RecognizeCallback, AudioSource
13+
from threading import Thread
1214

15+
try:
16+
from Queue import Queue, Full
17+
except ImportError:
18+
from queue import Queue, Full
19+
20+
###############################################
21+
#### Initalize queue to store the recordings ##
22+
###############################################
23+
CHUNK = 1024
24+
# Note: It will discard if the websocket client can't consumme fast enough
25+
# So, increase the max size as per your choice
26+
BUF_MAX_SIZE = CHUNK * 10
27+
# Buffer to store audio
28+
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))
29+
30+
# Create an instance of AudioSource
31+
audio_source = AudioSource(q, True, True)
32+
33+
###############################################
34+
#### Prepare Speech to Text Service ########
35+
###############################################
36+
37+
# initialize speech to text service
1338
speech_to_text = SpeechToTextV1(
1439
username='YOUR SERVICE USERNAME',
1540
password='YOUR SERVICE PASSWORD',
1641
url='https://stream.watsonplatform.net/speech-to-text/api')
1742

18-
19-
# Example using websockets
43+
# define callback for the speech to text service
2044
class MyRecognizeCallback(RecognizeCallback):
2145
def __init__(self):
2246
RecognizeCallback.__init__(self)
@@ -36,41 +60,69 @@ def on_inactivity_timeout(self, error):
3660
def on_listening(self):
3761
print('Service is listening')
3862

39-
def on_transcription_complete(self):
40-
print('Transcription completed')
41-
4263
def on_hypothesis(self, hypothesis):
4364
print(hypothesis)
4465

66+
def on_data(self, data):
67+
print(data)
68+
69+
def on_close(self):
70+
print("Connection closed")
71+
72+
# this function will initiate the recognize service and pass in the AudioSource
73+
def recognize_using_weboscket(*args):
74+
mycallback = MyRecognizeCallback()
75+
speech_to_text.recognize_using_websocket(audio=audio_source,
76+
content_type='audio/l16; rate=44100',
77+
recognize_callback=mycallback)
4578

46-
mycallback = MyRecognizeCallback()
47-
tmp = tempfile.NamedTemporaryFile()
79+
###############################################
80+
#### Prepare the for recording using Pyaudio ##
81+
###############################################
4882

83+
# Variables for recording the speech
4984
FORMAT = pyaudio.paInt16
5085
CHANNELS = 1
5186
RATE = 44100
52-
CHUNK = 1024
53-
RECORD_SECONDS = 5
5487

88+
# define callback for pyaudio to store the recording in queue
89+
def pyaudio_callback(in_data, frame_count, time_info, status):
90+
try:
91+
q.put(in_data)
92+
except Full:
93+
pass # discard
94+
return (None, pyaudio.paContinue)
95+
96+
# instantiate pyaudio
5597
audio = pyaudio.PyAudio()
98+
99+
# open stream using callback
56100
stream = audio.open(
57101
format=FORMAT,
58102
channels=CHANNELS,
59103
rate=RATE,
60104
input=True,
61-
frames_per_buffer=CHUNK)
62-
63-
print('recording....')
64-
with open(tmp.name, 'w') as f:
65-
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
66-
data = stream.read(CHUNK)
67-
f.write(data)
68-
69-
stream.stop_stream()
70-
stream.close()
71-
audio.terminate()
72-
print('Done recording...')
73-
74-
with open(tmp.name) as f:
75-
speech_to_text.recognize_with_websocket(
76-
audio=f, recognize_callback=mycallback)
105+
frames_per_buffer=CHUNK,
106+
stream_callback=pyaudio_callback,
107+
start=False
108+
)
109+
110+
#########################################################################
111+
#### Start the recording and start service to recognize the stream ######
112+
#########################################################################
113+
114+
print("Enter CTRL+C to end recording...")
115+
stream.start_stream()
116+
117+
try:
118+
recognize_thread = Thread(target=recognize_using_weboscket, args=())
119+
recognize_thread.start()
120+
121+
while True:
122+
pass
123+
except KeyboardInterrupt:
124+
# stop recording
125+
audio_source.completed_recording()
126+
stream.stop_stream()
127+
stream.close()
128+
audio.terminate()

examples/speech_to_text_v1.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,6 @@ def on_inactivity_timeout(self, error):
5050
def on_listening(self):
5151
print('Service is listening')
5252

53-
def on_transcription_complete(self):
54-
print('Transcription completed')
55-
5653
def on_hypothesis(self, hypothesis):
5754
print(hypothesis)
5855

requirements-dev.txt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,4 @@ Sphinx>=1.3.1
1717
bumpversion>=0.5.3
1818

1919
# Web sockets
20-
autobahn>=0.10.9
21-
Twisted>=13.2.0
22-
pyOpenSSL>=16.2.0
23-
service-identity>=17.0.0
20+
websocket-client==0.48.0

requirements.txt

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
11
requests>=2.0,<3.0
22
python_dateutil>=2.5.3
3-
autobahn>=0.10.9
4-
Twisted>=13.2.0
5-
pyOpenSSL>=16.2.0
6-
service-identity>=17.0.0
3+
websocket-client==0.48.0

test/integration/test_speech_to_text_v1.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
# coding: utf-8
21
from unittest import TestCase
32
import os
3+
from watson_developer_cloud.websocket import RecognizeCallback, AudioSource
44
import watson_developer_cloud
55
import pytest
6-
6+
import threading
77

88
@pytest.mark.skipif(
99
os.getenv('VCAP_SERVICES') is None, reason='requires VCAP_SERVICES')
@@ -83,3 +83,26 @@ def test_acoustic_model(self):
8383

8484
self.speech_to_text.delete_acoustic_model(
8585
get_acoustic_model['customization_id'])
86+
87+
def test_recognize_using_websocket(self):
88+
class MyRecognizeCallback(RecognizeCallback):
89+
def __init__(self):
90+
RecognizeCallback.__init__(self)
91+
self.error = None
92+
self.transcript = None
93+
94+
def on_error(self, error):
95+
self.error = error
96+
97+
def on_transcription(self, transcript):
98+
self.transcript = transcript
99+
100+
testCallback = MyRecognizeCallback()
101+
with open(os.path.join(os.path.dirname(__file__), '../../resources/speech.wav'), 'rb') as audio_file:
102+
audio_source = AudioSource(audio_file, False)
103+
t = threading.Thread(target=self.speech_to_text.recognize_using_websocket, args=(audio_source, "audio/l16; rate=44100", testCallback))
104+
t.start()
105+
t.join()
106+
assert testCallback.error is None
107+
assert testCallback.transcript is not None
108+
assert testCallback.transcript[0]['transcript'] == 'thunderstorms could produce large hail isolated tornadoes and heavy rain '

watson_developer_cloud/websocket/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,5 @@
1515
# limitations under the License.
1616

1717
from .recognize_abstract_callback import RecognizeCallback
18-
from .speech_to_text_websocket_listener import RecognizeListener
18+
from .recognize_listener import RecognizeListener
19+
from .audio_source import AudioSource
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# coding: utf-8
2+
3+
# Copyright 2018 IBM All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
class AudioSource(object):
18+
""""Audio source for the speech to text recognize using websocket"""
19+
20+
def __init__(self, input, is_recording=False, is_buffer=False):
21+
"""
22+
:param bytes/Queue input: The audio to transcribe in the format specified by the
23+
`Content-Type` header.
24+
:param bool is_recording: Used to represent if audio recording is in progress
25+
:param bool is_buffer: `True` if audio is a Queue
26+
"""
27+
self.input = input
28+
self.is_recording = is_recording
29+
self.is_buffer = is_buffer
30+
31+
def completed_recording(self):
32+
"""
33+
Sets the `is_recording` to False
34+
"""
35+
self.is_recording = False

watson_developer_cloud/websocket/recognize_abstract_callback.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,19 @@ def __init__(self):
2121

2222
def on_transcription(self, transcript):
2323
"""
24-
Called when an interim result is received
24+
Called after the service returns the final result for the transcription.
2525
"""
2626
pass
2727

2828
def on_connected(self):
2929
"""
30-
Called when a WebSocket connection was made
30+
Called when a Websocket connection was made
3131
"""
3232
pass
3333

3434
def on_error(self, error):
3535
"""
36-
Called when there is an error in the Web Socket connection.
36+
Called when there is an error in the Websocket connection.
3737
"""
3838
pass
3939

@@ -49,20 +49,20 @@ def on_listening(self):
4949
"""
5050
pass
5151

52-
def on_transcription_complete(self):
52+
def on_hypothesis(self, hypothesis):
5353
"""
54-
Called after the service returns the final result for the transcription.
54+
Called when an interim result is received.
5555
"""
5656
pass
5757

58-
def on_hypothesis(self, hypothesis):
58+
def on_data(self, data):
5959
"""
60-
Called when the service returns the final hypothesis
60+
Called when the service returns results. The data is returned unparsed.
6161
"""
6262
pass
6363

64-
def on_data(self, data):
64+
def on_close(self):
6565
"""
66-
Called when the service returns results. The data is returned unparsed.
66+
Called when the Websocket connection is closed
6767
"""
6868
pass

0 commit comments

Comments
 (0)