Merge pull request #540 from watson-developer-cloud/ws

germanattanasio · web-flow · commit 1039f44ea339 · 2018-08-23T20:07:33.000-04:00
new(WS): web socket-client library for STT weboscket
diff --git a/appveyor.yml b/appveyor.yml
@@ -0,0 +1,35 @@
+environment:
+
+  matrix:
+
+    - PYTHON: "C:\\Python27"
+    - PYTHON: "C:\\Python35"
+    - PYTHON: "C:\\Python27-x64"
+    - PYTHON: "C:\\Python35-x64"
+    - PYTHON: "C:\\Python36-x64"
+
+install:
+
+  # Install Python (from the official .msi of https://python.org) and pip when
+  # not already installed.
+  - ps: if (-not(Test-Path($env:PYTHON))) { & appveyor\install.ps1 }
+
+  # Prepend newly installed Python to the PATH of this build
+  - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
+
+  - "python -m pip install --upgrade pip"
+
+  - "pip install --editable ."
+
+  - "pip install -r requirements-dev.txt"
+
+build: off
+
+test_script:
+
+  - ps: py.test --reruns 3 --cov=watson_developer_cloud
+
+deploy: off
+
+matrix:
+  fast_finish: true
diff --git a/examples/microphone-speech-to-text.py b/examples/microphone-speech-to-text.py
@@ -1,22 +1,46 @@
 # You need to install pyaudio to run this example
 # pip install pyaudio
 
-# Note that you need to record just once. You will not be able to send
-# more audio after the initial recording.
+# When using a microphone, the AudioSource `input` parameter would be
+# initialised as a queue. The pyaudio stream would be continuosly adding
+# recordings to the queue, and the websocket client would be sending the
+# recordings to the speech to text service
 
 from __future__ import print_function
 import pyaudio
-import tempfile
 from watson_developer_cloud import SpeechToTextV1
-from watson_developer_cloud.websocket import RecognizeCallback
+from watson_developer_cloud.websocket import RecognizeCallback, AudioSource
+from threading import Thread
 
+try:
+    from Queue import Queue, Full
+except ImportError:
+    from queue import Queue, Full
+
+###############################################
+#### Initalize queue to store the recordings ##
+###############################################
+CHUNK = 1024
+# Note: It will discard if the websocket client can't consumme fast enough
+# So, increase the max size as per your choice
+BUF_MAX_SIZE = CHUNK * 10
+# Buffer to store audio
+q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK)))
+
+# Create an instance of AudioSource
+audio_source = AudioSource(q, True, True)
+
+###############################################
+#### Prepare Speech to Text Service ########
+###############################################
+
+# initialize speech to text service
 speech_to_text = SpeechToTextV1(
     username='YOUR SERVICE USERNAME',
     password='YOUR SERVICE PASSWORD',
     url='https://stream.watsonplatform.net/speech-to-text/api')
 
-
-# Example using websockets
+# define callback for the speech to text service
 class MyRecognizeCallback(RecognizeCallback):
     def __init__(self):
         RecognizeCallback.__init__(self)
@@ -36,41 +60,69 @@ def on_inactivity_timeout(self, error):
     def on_listening(self):
         print('Service is listening')
 
-    def on_transcription_complete(self):
-        print('Transcription completed')
-
     def on_hypothesis(self, hypothesis):
         print(hypothesis)
 
+    def on_data(self, data):
+        print(data)
+
+    def on_close(self):
+        print("Connection closed")
+
+# this function will initiate the recognize service and pass in the AudioSource
+def recognize_using_weboscket(*args):
+    mycallback = MyRecognizeCallback()
+    speech_to_text.recognize_using_websocket(audio=audio_source,
+                                             content_type='audio/l16; rate=44100',
+                                             recognize_callback=mycallback)
 
-mycallback = MyRecognizeCallback()
-tmp = tempfile.NamedTemporaryFile()
+###############################################
+#### Prepare the for recording using Pyaudio ##
+###############################################
 
+# Variables for recording the speech
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 RATE = 44100
-CHUNK = 1024
-RECORD_SECONDS = 5
 
+# define callback for pyaudio to store the recording in queue
+def pyaudio_callback(in_data, frame_count, time_info, status):
+    try:
+        q.put(in_data)
+    except Full:
+        pass # discard
+    return (None, pyaudio.paContinue)
+
+# instantiate pyaudio
 audio = pyaudio.PyAudio()
+
+# open stream using callback
 stream = audio.open(
     format=FORMAT,
     channels=CHANNELS,
     rate=RATE,
     input=True,
-    frames_per_buffer=CHUNK)
-
-print('recording....')
-with open(tmp.name, 'w') as f:
-    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
-        data = stream.read(CHUNK)
-        f.write(data)
-
-stream.stop_stream()
-stream.close()
-audio.terminate()
-print('Done recording...')
-
-with open(tmp.name) as f:
-    speech_to_text.recognize_with_websocket(
-        audio=f, recognize_callback=mycallback)
+    frames_per_buffer=CHUNK,
+    stream_callback=pyaudio_callback,
+    start=False
+)
+
+#########################################################################
+#### Start the recording and start service to recognize the stream ######
+#########################################################################
+
+print("Enter CTRL+C to end recording...")
+stream.start_stream()
+
+try:
+    recognize_thread = Thread(target=recognize_using_weboscket, args=())
+    recognize_thread.start()
+
+    while True:
+        pass
+except KeyboardInterrupt:
+    # stop recording
+    audio_source.completed_recording()
+    stream.stop_stream()
+    stream.close()
+    audio.terminate()
diff --git a/examples/speech_to_text_v1.py b/examples/speech_to_text_v1.py
@@ -50,9 +50,6 @@ def on_inactivity_timeout(self, error):
     def on_listening(self):
         print('Service is listening')
 
-    def on_transcription_complete(self):
-        print('Transcription completed')
-
     def on_hypothesis(self, hypothesis):
         print(hypothesis)
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -17,7 +17,4 @@ Sphinx>=1.3.1
 bumpversion>=0.5.3
 
 # Web sockets
-autobahn>=0.10.9
-Twisted>=13.2.0
-pyOpenSSL>=16.2.0
-service-identity>=17.0.0
+websocket-client==0.48.0
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,3 @@
 requests>=2.0,<3.0
 python_dateutil>=2.5.3
-autobahn>=0.10.9
-Twisted>=13.2.0
-pyOpenSSL>=16.2.0
-service-identity>=17.0.0
+websocket-client==0.48.0
diff --git a/test/integration/test_speech_to_text_v1.py b/test/integration/test_speech_to_text_v1.py
@@ -1,9 +1,9 @@
-# coding: utf-8
 from unittest import TestCase
 import os
+from watson_developer_cloud.websocket import RecognizeCallback, AudioSource
 import watson_developer_cloud
 import pytest
-
+import threading
 
 @pytest.mark.skipif(
     os.getenv('VCAP_SERVICES') is None, reason='requires VCAP_SERVICES')
@@ -83,3 +83,26 @@ def test_acoustic_model(self):
 
         self.speech_to_text.delete_acoustic_model(
             get_acoustic_model['customization_id'])
+
+    def test_recognize_using_websocket(self):
+        class MyRecognizeCallback(RecognizeCallback):
+            def __init__(self):
+                RecognizeCallback.__init__(self)
+                self.error = None
+                self.transcript = None
+
+            def on_error(self, error):
+                self.error = error
+
+            def on_transcription(self, transcript):
+                self.transcript = transcript
+
+        testCallback = MyRecognizeCallback()
+        with open(os.path.join(os.path.dirname(__file__), '../../resources/speech.wav'), 'rb') as audio_file:
+            audio_source = AudioSource(audio_file, False)
+            t = threading.Thread(target=self.speech_to_text.recognize_using_websocket, args=(audio_source, "audio/l16; rate=44100", testCallback))
+            t.start()
+            t.join()
+        assert testCallback.error is None
+        assert testCallback.transcript is not None
+        assert testCallback.transcript[0]['transcript'] == 'thunderstorms could produce large hail isolated tornadoes and heavy rain '
diff --git a/watson_developer_cloud/websocket/__init__.py b/watson_developer_cloud/websocket/__init__.py
@@ -15,4 +15,5 @@
 # limitations under the License.
 
 from .recognize_abstract_callback import RecognizeCallback
-from .speech_to_text_websocket_listener import RecognizeListener
+from .recognize_listener import RecognizeListener
+from .audio_source import AudioSource
diff --git a/watson_developer_cloud/websocket/audio_source.py b/watson_developer_cloud/websocket/audio_source.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+
+# Copyright 2018 IBM All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+class AudioSource(object):
+    """"Audio source for the speech to text recognize using websocket"""
+
+    def __init__(self, input, is_recording=False, is_buffer=False):
+        """
+        :param bytes/Queue input: The audio to transcribe in the format specified by the
+        `Content-Type` header.
+        :param bool is_recording: Used to represent if audio recording is in progress
+        :param bool is_buffer: `True` if audio is a Queue
+        """
+        self.input = input
+        self.is_recording = is_recording
+        self.is_buffer = is_buffer
+
+    def completed_recording(self):
+        """
+        Sets the `is_recording` to False
+        """
+        self.is_recording = False
diff --git a/watson_developer_cloud/websocket/recognize_abstract_callback.py b/watson_developer_cloud/websocket/recognize_abstract_callback.py
@@ -21,19 +21,19 @@ def __init__(self):
 
     def on_transcription(self, transcript):
         """
-    Called when an interim result is received
+    Called after the service returns the final result for the transcription.
     """
         pass
 
     def on_connected(self):
         """
-    Called when a WebSocket connection was made
+    Called when a Websocket connection was made
     """
         pass
 
     def on_error(self, error):
         """
-    Called when there is an error in the Web Socket connection.
+    Called when there is an error in the Websocket connection.
     """
         pass
 
@@ -49,20 +49,20 @@ def on_listening(self):
     """
         pass
 
-    def on_transcription_complete(self):
+    def on_hypothesis(self, hypothesis):
         """
-    Called after the service returns the final result for the transcription.
+    Called when an interim result is received.
     """
         pass
 
-    def on_hypothesis(self, hypothesis):
+    def on_data(self, data):
         """
-    Called when the service returns the final hypothesis
+    Called when the service returns results. The data is returned unparsed.
     """
         pass
 
-    def on_data(self, data):
+    def on_close(self):
         """
-    Called when the service returns results. The data is returned unparsed.
+    Called when the Websocket connection is closed
     """
         pass
diff --git a/watson_developer_cloud/websocket/recognize_listener.py b/watson_developer_cloud/websocket/recognize_listener.py
diff --git a/watson_developer_cloud/websocket/speech_to_text_websocket_listener.py b/watson_developer_cloud/websocket/speech_to_text_websocket_listener.py