Skip to content

Commit a24103d

Browse files
committed
fix: correct websocket function
fix: correct websocket function
1 parent a3a518e commit a24103d

2 files changed

Lines changed: 57 additions & 31 deletions

File tree

lib/ibm_watson/speech_to_text_v1.rb

Lines changed: 55 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ module IBMWatson
5050
# The Speech to Text V1 service.
5151
class SpeechToTextV1 < IBMCloudSdkCore::BaseService
5252
include Concurrent::Async
53+
DEFAULT_SERVICE_NAME = "speech_to_text"
54+
DEFAULT_SERVICE_URL = "https://stream.watsonplatform.net/speech-to-text/api"
5355
##
5456
# @!method initialize(args)
5557
# Construct a new client for the Speech to Text service.
@@ -58,15 +60,19 @@ class SpeechToTextV1 < IBMCloudSdkCore::BaseService
5860
# @option args service_url [String] The base service URL to use when contacting the service.
5961
# The base service_url may differ between IBM Cloud regions.
6062
# @option args authenticator [Object] The Authenticator instance to be configured for this service.
63+
# @option args service_name [String] The name of the service to configure. Will be used as the key to load
64+
# any external configuration, if applicable.
6165
def initialize(args = {})
6266
@__async_initialized__ = false
6367
defaults = {}
64-
defaults[:service_url] = "https://stream.watsonplatform.net/speech-to-text/api"
68+
defaults[:service_url] = DEFAULT_SERVICE_URL
69+
defaults[:service_name] = DEFAULT_SERVICE_NAME
6570
defaults[:authenticator] = nil
71+
user_service_url = args[:service_url] unless args[:service_url].nil?
6672
args = defaults.merge(args)
67-
args[:service_name] = "speech_to_text"
6873
args[:authenticator] = IBMCloudSdkCore::ConfigBasedAuthenticatorFactory.new.get_authenticator(service_name: args[:service_name]) if args[:authenticator].nil?
6974
super
75+
@service_url = user_service_url unless user_service_url.nil?
7076
end
7177

7278
#########################
@@ -277,8 +283,14 @@ def get_model(model_id:)
277283
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
278284
# one or more string tokens. Keywords are spotted only in the final results, not in
279285
# interim hypotheses. If you specify any keywords, you must also specify a keywords
280-
# threshold. You can spot a maximum of 1000 keywords. Omit the parameter or specify
281-
# an empty array if you do not need to spot keywords. See [Keyword
286+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
287+
# keywords.
288+
#
289+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
290+
# can have a maximum length of 1024 characters, though the maximum effective length
291+
# for double-byte languages might be shorter. Keywords are case-insensitive.
292+
#
293+
# See [Keyword
282294
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
283295
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
284296
# considered to match a keyword if its confidence is greater than or equal to the
@@ -568,6 +580,7 @@ def recognize_using_websocket(
568580
require_relative("./websocket/speech_to_text_websocket_listener.rb")
569581
headers = {}
570582
headers = conn.default_options.headers.to_hash unless conn.default_options.headers.to_hash.empty?
583+
@authenticator.authenticate(headers)
571584
service_url = @service_url.gsub("https:", "wss:")
572585
params = {
573586
"model" => model,
@@ -902,8 +915,14 @@ def unregister_callback(callback_url:)
902915
# @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
903916
# one or more string tokens. Keywords are spotted only in the final results, not in
904917
# interim hypotheses. If you specify any keywords, you must also specify a keywords
905-
# threshold. You can spot a maximum of 1000 keywords. Omit the parameter or specify
906-
# an empty array if you do not need to spot keywords. See [Keyword
918+
# threshold. Omit the parameter or specify an empty array if you do not need to spot
919+
# keywords.
920+
#
921+
# You can spot a maximum of 1000 keywords with a single request. A single keyword
922+
# can have a maximum length of 1024 characters, though the maximum effective length
923+
# for double-byte languages might be shorter. Keywords are case-insensitive.
924+
#
925+
# See [Keyword
907926
# spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
908927
# @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
909928
# considered to match a keyword if its confidence is greater than or equal to the
@@ -1599,10 +1618,10 @@ def list_corpora(customization_id:)
15991618
#
16001619
# The call returns an HTTP 201 response code if the corpus is valid. The service
16011620
# then asynchronously processes the contents of the corpus and automatically
1602-
# extracts new words that it finds. This can take on the order of a minute or two to
1603-
# complete depending on the total number of words and the number of new words in the
1604-
# corpus, as well as the current load on the service. You cannot submit requests to
1605-
# add additional resources to the custom model or to train the model until the
1621+
# extracts new words that it finds. This operation can take on the order of minutes
1622+
# to complete depending on the total number of words and the number of new words in
1623+
# the corpus, as well as the current load on the service. You cannot submit requests
1624+
# to add additional resources to the custom model or to train the model until the
16061625
# service's analysis of the corpus for the current request completes. Use the **List
16071626
# a corpus** method to check the status of the analysis.
16081627
#
@@ -2147,12 +2166,12 @@ def list_grammars(customization_id:)
21472166
#
21482167
# The call returns an HTTP 201 response code if the grammar is valid. The service
21492168
# then asynchronously processes the contents of the grammar and automatically
2150-
# extracts new words that it finds. This can take a few seconds to complete
2151-
# depending on the size and complexity of the grammar, as well as the current load
2152-
# on the service. You cannot submit requests to add additional resources to the
2153-
# custom model or to train the model until the service's analysis of the grammar for
2154-
# the current request completes. Use the **Get a grammar** method to check the
2155-
# status of the analysis.
2169+
# extracts new words that it finds. This operation can take a few seconds or minutes
2170+
# to complete depending on the size and complexity of the grammar, as well as the
2171+
# current load on the service. You cannot submit requests to add additional
2172+
# resources to the custom model or to train the model until the service's analysis
2173+
# of the grammar for the current request completes. Use the **Get a grammar** method
2174+
# to check the status of the analysis.
21562175
#
21572176
# The service populates the model's words resource with any word that is recognized
21582177
# by the grammar that is not found in the model's base vocabulary. These are
@@ -2499,7 +2518,7 @@ def delete_acoustic_model(customization_id:)
24992518
# to complete depending on the total amount of audio data on which the custom
25002519
# acoustic model is being trained and the current load on the service. Typically,
25012520
# training a custom acoustic model takes approximately two to four times the length
2502-
# of its audio data. The range of time depends on the model being trained and the
2521+
# of its audio data. The actual time depends on the model being trained and the
25032522
# nature of the audio, such as whether the audio is clean or noisy. The method
25042523
# returns an HTTP 200 response code to indicate that the training process has begun.
25052524
#
@@ -2518,8 +2537,9 @@ def delete_acoustic_model(customization_id:)
25182537
# Train with a custom language model if you have verbatim transcriptions of the
25192538
# audio files that you have added to the custom model or you have either corpora
25202539
# (text files) or a list of words that are relevant to the contents of the audio
2521-
# files. Both of the custom models must be based on the same version of the same
2522-
# base model for training to succeed.
2540+
# files. For training to succeed, both of the custom models must be based on the
2541+
# same version of the same base model, and the custom language model must be fully
2542+
# trained and available.
25232543
#
25242544
# **See also:**
25252545
# * [Train the custom acoustic
@@ -2535,6 +2555,9 @@ def delete_acoustic_model(customization_id:)
25352555
# another training request or a request to add audio resources to the model.
25362556
# * The custom model contains less than 10 minutes or more than 200 hours of audio
25372557
# data.
2558+
# * You passed a custom language model with the `custom_language_model_id` query
2559+
# parameter that is not in the available state. A custom language model must be
2560+
# fully trained and available to be used to train a custom acoustic model.
25382561
# * You passed an incompatible custom language model with the
25392562
# `custom_language_model_id` query parameter. Both custom models must be based on
25402563
# the same version of the same base model.
@@ -2550,8 +2573,8 @@ def delete_acoustic_model(customization_id:)
25502573
# been trained with verbatim transcriptions of the audio resources or that contains
25512574
# words that are relevant to the contents of the audio resources. The custom
25522575
# language model must be based on the same version of the same base model as the
2553-
# custom acoustic model. The credentials specified with the request must own both
2554-
# custom models.
2576+
# custom acoustic model, and the custom language model must be fully trained and
2577+
# available. The credentials specified with the request must own both custom models.
25552578
# @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
25562579
def train_acoustic_model(customization_id:, custom_language_model_id: nil)
25572580
raise ArgumentError.new("customization_id must be provided") if customization_id.nil?
@@ -2649,8 +2672,9 @@ def reset_acoustic_model(customization_id:)
26492672
# service that owns the custom model.
26502673
# @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
26512674
# customization ID (GUID) of that custom language model. The custom language model
2652-
# must be upgraded before the custom acoustic model can be upgraded. The credentials
2653-
# specified with the request must own both custom models.
2675+
# must be upgraded before the custom acoustic model can be upgraded. The custom
2676+
# language model must be fully trained and available. The credentials specified with
2677+
# the request must own both custom models.
26542678
# @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
26552679
# has been modified since it was last trained. Use this parameter only to force the
26562680
# upgrade of a custom acoustic model that is trained with a custom language model,
@@ -2745,14 +2769,14 @@ def list_audio(customization_id:)
27452769
# same name as an existing audio resource, set the `allow_overwrite` parameter to
27462770
# `true`; otherwise, the request fails.
27472771
#
2748-
# The method is asynchronous. It can take several seconds to complete depending on
2749-
# the duration of the audio and, in the case of an archive file, the total number of
2750-
# audio files being processed. The service returns a 201 response code if the audio
2751-
# is valid. It then asynchronously analyzes the contents of the audio file or files
2752-
# and automatically extracts information about the audio such as its length,
2753-
# sampling rate, and encoding. You cannot submit requests to train or upgrade the
2754-
# model until the service's analysis of all audio resources for current requests
2755-
# completes.
2772+
# The method is asynchronous. It can take several seconds or minutes to complete
2773+
# depending on the duration of the audio and, in the case of an archive file, the
2774+
# total number of audio files being processed. The service returns a 201 response
2775+
# code if the audio is valid. It then asynchronously analyzes the contents of the
2776+
# audio file or files and automatically extracts information about the audio such as
2777+
# its length, sampling rate, and encoding. You cannot submit requests to train or
2778+
# upgrade the model until the service's analysis of all audio resources for current
2779+
# requests completes.
27562780
#
27572781
# To determine the status of the service's analysis of the audio, use the **Get an
27582782
# audio resource** method to poll the status of the audio. The method accepts the

test/integration/test_speech_to_text_v1.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ def test_recognize_websocket_as_chunks
189189
end
190190
thr = Thread.new { speech.start }
191191
thr.join
192+
assert(atomic_boolean.false?)
192193
end
193194

194195
def test_recognize_websocket
@@ -206,6 +207,7 @@ def test_recognize_websocket
206207
)
207208
thr = Thread.new { speech.start }
208209
thr.join
210+
assert(atomic_boolean.false?)
209211
end
210212

211213
def test_inactivity_timeout_using_websocket

0 commit comments

Comments
 (0)