@@ -50,6 +50,8 @@ module IBMWatson
5050 # The Speech to Text V1 service.
5151 class SpeechToTextV1 < IBMCloudSdkCore ::BaseService
5252 include Concurrent ::Async
53+ DEFAULT_SERVICE_NAME = "speech_to_text"
54+ DEFAULT_SERVICE_URL = "https://stream.watsonplatform.net/speech-to-text/api"
5355 ##
5456 # @!method initialize(args)
5557 # Construct a new client for the Speech to Text service.
@@ -58,15 +60,19 @@ class SpeechToTextV1 < IBMCloudSdkCore::BaseService
5860 # @option args service_url [String] The base service URL to use when contacting the service.
5961 # The base service_url may differ between IBM Cloud regions.
6062 # @option args authenticator [Object] The Authenticator instance to be configured for this service.
63+ # @option args service_name [String] The name of the service to configure. Will be used as the key to load
64+ # any external configuration, if applicable.
6165 def initialize ( args = { } )
6266 @__async_initialized__ = false
6367 defaults = { }
64- defaults [ :service_url ] = "https://stream.watsonplatform.net/speech-to-text/api"
68+ defaults [ :service_url ] = DEFAULT_SERVICE_URL
69+ defaults [ :service_name ] = DEFAULT_SERVICE_NAME
6570 defaults [ :authenticator ] = nil
71+ user_service_url = args [ :service_url ] unless args [ :service_url ] . nil?
6672 args = defaults . merge ( args )
67- args [ :service_name ] = "speech_to_text"
6873 args [ :authenticator ] = IBMCloudSdkCore ::ConfigBasedAuthenticatorFactory . new . get_authenticator ( service_name : args [ :service_name ] ) if args [ :authenticator ] . nil?
6974 super
75+ @service_url = user_service_url unless user_service_url . nil?
7076 end
7177
7278 #########################
@@ -277,8 +283,14 @@ def get_model(model_id:)
277283 # @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
278284 # one or more string tokens. Keywords are spotted only in the final results, not in
279285 # interim hypotheses. If you specify any keywords, you must also specify a keywords
280- # threshold. You can spot a maximum of 1000 keywords. Omit the parameter or specify
281- # an empty array if you do not need to spot keywords. See [Keyword
286+ # threshold. Omit the parameter or specify an empty array if you do not need to spot
287+ # keywords.
288+ #
289+ # You can spot a maximum of 1000 keywords with a single request. A single keyword
290+ # can have a maximum length of 1024 characters, though the maximum effective length
291+ # for double-byte languages might be shorter. Keywords are case-insensitive.
292+ #
293+ # See [Keyword
282294 # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
283295 # @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
284296 # considered to match a keyword if its confidence is greater than or equal to the
@@ -568,6 +580,7 @@ def recognize_using_websocket(
568580 require_relative ( "./websocket/speech_to_text_websocket_listener.rb" )
569581 headers = { }
570582 headers = conn . default_options . headers . to_hash unless conn . default_options . headers . to_hash . empty?
583+ @authenticator . authenticate ( headers )
571584 service_url = @service_url . gsub ( "https:" , "wss:" )
572585 params = {
573586 "model" => model ,
@@ -902,8 +915,14 @@ def unregister_callback(callback_url:)
902915 # @param keywords [Array[String]] An array of keyword strings to spot in the audio. Each keyword string can include
903916 # one or more string tokens. Keywords are spotted only in the final results, not in
904917 # interim hypotheses. If you specify any keywords, you must also specify a keywords
905- # threshold. You can spot a maximum of 1000 keywords. Omit the parameter or specify
906- # an empty array if you do not need to spot keywords. See [Keyword
918+ # threshold. Omit the parameter or specify an empty array if you do not need to spot
919+ # keywords.
920+ #
921+ # You can spot a maximum of 1000 keywords with a single request. A single keyword
922+ # can have a maximum length of 1024 characters, though the maximum effective length
923+ # for double-byte languages might be shorter. Keywords are case-insensitive.
924+ #
925+ # See [Keyword
907926 # spotting](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-output#keyword_spotting).
908927 # @param keywords_threshold [Float] A confidence value that is the lower bound for spotting a keyword. A word is
909928 # considered to match a keyword if its confidence is greater than or equal to the
@@ -1599,10 +1618,10 @@ def list_corpora(customization_id:)
15991618 #
16001619 # The call returns an HTTP 201 response code if the corpus is valid. The service
16011620 # then asynchronously processes the contents of the corpus and automatically
1602- # extracts new words that it finds. This can take on the order of a minute or two to
1603- # complete depending on the total number of words and the number of new words in the
1604- # corpus, as well as the current load on the service. You cannot submit requests to
1605- # add additional resources to the custom model or to train the model until the
1621+ # extracts new words that it finds. This operation can take on the order of minutes
1622+ # to complete depending on the total number of words and the number of new words in
1623+ # the corpus, as well as the current load on the service. You cannot submit requests
1624+ # to add additional resources to the custom model or to train the model until the
16061625 # service's analysis of the corpus for the current request completes. Use the **List
16071626 # a corpus** method to check the status of the analysis.
16081627 #
@@ -2147,12 +2166,12 @@ def list_grammars(customization_id:)
21472166 #
21482167 # The call returns an HTTP 201 response code if the grammar is valid. The service
21492168 # then asynchronously processes the contents of the grammar and automatically
2150- # extracts new words that it finds. This can take a few seconds to complete
2151- # depending on the size and complexity of the grammar, as well as the current load
2152- # on the service. You cannot submit requests to add additional resources to the
2153- # custom model or to train the model until the service's analysis of the grammar for
2154- # the current request completes. Use the **Get a grammar** method to check the
2155- # status of the analysis.
2169+ # extracts new words that it finds. This operation can take a few seconds or minutes
2170+ # to complete depending on the size and complexity of the grammar, as well as the
2171+ # current load on the service. You cannot submit requests to add additional
2172+ # resources to the custom model or to train the model until the service's analysis
2173+ # of the grammar for the current request completes. Use the **Get a grammar** method
2174+ # to check the status of the analysis.
21562175 #
21572176 # The service populates the model's words resource with any word that is recognized
21582177 # by the grammar that is not found in the model's base vocabulary. These are
@@ -2499,7 +2518,7 @@ def delete_acoustic_model(customization_id:)
24992518 # to complete depending on the total amount of audio data on which the custom
25002519 # acoustic model is being trained and the current load on the service. Typically,
25012520 # training a custom acoustic model takes approximately two to four times the length
2502- # of its audio data. The range of time depends on the model being trained and the
2521+ # of its audio data. The actual time depends on the model being trained and the
25032522 # nature of the audio, such as whether the audio is clean or noisy. The method
25042523 # returns an HTTP 200 response code to indicate that the training process has begun.
25052524 #
@@ -2518,8 +2537,9 @@ def delete_acoustic_model(customization_id:)
25182537 # Train with a custom language model if you have verbatim transcriptions of the
25192538 # audio files that you have added to the custom model or you have either corpora
25202539 # (text files) or a list of words that are relevant to the contents of the audio
2521- # files. Both of the custom models must be based on the same version of the same
2522- # base model for training to succeed.
2540+ # files. For training to succeed, both of the custom models must be based on the
2541+ # same version of the same base model, and the custom language model must be fully
2542+ # trained and available.
25232543 #
25242544 # **See also:**
25252545 # * [Train the custom acoustic
@@ -2535,6 +2555,9 @@ def delete_acoustic_model(customization_id:)
25352555 # another training request or a request to add audio resources to the model.
25362556 # * The custom model contains less than 10 minutes or more than 200 hours of audio
25372557 # data.
2558+ # * You passed a custom language model with the `custom_language_model_id` query
2559+ # parameter that is not in the available state. A custom language model must be
2560+ # fully trained and available to be used to train a custom acoustic model.
25382561 # * You passed an incompatible custom language model with the
25392562 # `custom_language_model_id` query parameter. Both custom models must be based on
25402563 # the same version of the same base model.
@@ -2550,8 +2573,8 @@ def delete_acoustic_model(customization_id:)
25502573 # been trained with verbatim transcriptions of the audio resources or that contains
25512574 # words that are relevant to the contents of the audio resources. The custom
25522575 # language model must be based on the same version of the same base model as the
2553- # custom acoustic model. The credentials specified with the request must own both
2554- # custom models.
2576+ # custom acoustic model, and the custom language model must be fully trained and
2577+ # available. The credentials specified with the request must own both custom models.
25552578 # @return [IBMCloudSdkCore::DetailedResponse] A `IBMCloudSdkCore::DetailedResponse` object representing the response.
25562579 def train_acoustic_model ( customization_id :, custom_language_model_id : nil )
25572580 raise ArgumentError . new ( "customization_id must be provided" ) if customization_id . nil?
@@ -2649,8 +2672,9 @@ def reset_acoustic_model(customization_id:)
26492672 # service that owns the custom model.
26502673 # @param custom_language_model_id [String] If the custom acoustic model was trained with a custom language model, the
26512674 # customization ID (GUID) of that custom language model. The custom language model
2652- # must be upgraded before the custom acoustic model can be upgraded. The credentials
2653- # specified with the request must own both custom models.
2675+ # must be upgraded before the custom acoustic model can be upgraded. The custom
2676+ # language model must be fully trained and available. The credentials specified with
2677+ # the request must own both custom models.
26542678 # @param force [Boolean] If `true`, forces the upgrade of a custom acoustic model for which no input data
26552679 # has been modified since it was last trained. Use this parameter only to force the
26562680 # upgrade of a custom acoustic model that is trained with a custom language model,
@@ -2745,14 +2769,14 @@ def list_audio(customization_id:)
27452769 # same name as an existing audio resource, set the `allow_overwrite` parameter to
27462770 # `true`; otherwise, the request fails.
27472771 #
2748- # The method is asynchronous. It can take several seconds to complete depending on
2749- # the duration of the audio and, in the case of an archive file, the total number of
2750- # audio files being processed. The service returns a 201 response code if the audio
2751- # is valid. It then asynchronously analyzes the contents of the audio file or files
2752- # and automatically extracts information about the audio such as its length,
2753- # sampling rate, and encoding. You cannot submit requests to train or upgrade the
2754- # model until the service's analysis of all audio resources for current requests
2755- # completes.
2772+ # The method is asynchronous. It can take several seconds or minutes to complete
2773+ # depending on the duration of the audio and, in the case of an archive file, the
2774+ # total number of audio files being processed. The service returns a 201 response
2775+ # code if the audio is valid. It then asynchronously analyzes the contents of the
2776+ # audio file or files and automatically extracts information about the audio such as
2777+ # its length, sampling rate, and encoding. You cannot submit requests to train or
2778+ # upgrade the model until the service's analysis of all audio resources for current
2779+ # requests completes.
27562780 #
27572781 # To determine the status of the service's analysis of the audio, use the **Get an
27582782 # audio resource** method to poll the status of the audio. The method accepts the
0 commit comments