Skip to content

Commit d6dd976

Browse files
committed
lighter default options and example of output
1 parent d2a2183 commit d6dd976

File tree

1 file changed

+79
-5
lines changed

1 file changed

+79
-5
lines changed

speech-to-text/recognize-stream.js

Lines changed: 79 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,86 @@ var QUERY_PARAMS_ALLOWED = ['model', 'watson-token']; //, 'X-Watson-Learning-Opt
3232

3333

3434
/**
35-
* pipe()-able Node.js Readable/Writeable stream - accepts binary audio and emits text in it's `data` events.
36-
* Also emits `results` events with interim results and other data.
35+
* pipe()-able Node.js Readable/Writeable stream - accepts binary audio and emits text/objects in it's `data` events.
3736
*
3837
* Cannot be instantiated directly, instead reated by calling #createRecognizeStream()
3938
*
4039
* Uses WebSockets under the hood. For audio with no recognizable speech, no `data` events are emitted.
40+
*
41+
* By default, only finalized text is emitted in the data events, however in `readableObjectMode` (usually just `objectMode` when using a helper method).
42+
*
43+
* An interim result looks like this (assuming all features are enabled):
44+
```js
45+
{ results:
46+
[ { alternatives:
47+
[ { timestamps:
48+
[ [ 'it', 20.9, 21.04 ],
49+
[ 'is', 21.04, 21.17 ],
50+
[ 'a', 21.17, 21.25 ],
51+
[ 'site', 21.25, 21.56 ],
52+
[ 'that', 21.56, 21.7 ],
53+
[ 'hardly', 21.7, 22.06 ],
54+
[ 'anyone', 22.06, 22.49 ],
55+
[ 'can', 22.49, 22.67 ],
56+
[ 'behold', 22.67, 23.13 ],
57+
[ 'without', 23.13, 23.46 ],
58+
[ 'some', 23.46, 23.67 ],
59+
[ 'sort', 23.67, 23.91 ],
60+
[ 'of', 23.91, 24 ],
61+
[ 'unwanted', 24, 24.58 ],
62+
[ 'emotion', 24.58, 25.1 ] ],
63+
transcript: 'it is a site that hardly anyone can behold without some sort of unwanted emotion ' } ],
64+
final: false } ],
65+
result_index: 3 }
66+
```
67+
68+
While a final result looks like this (again, assuming all features are enabled):
69+
```js
70+
{ results:
71+
[ { alternatives:
72+
[ { word_confidence:
73+
[ [ 'it', 1 ],
74+
[ 'is', 0.956286624429304 ],
75+
[ 'a', 0.8105753725270362 ],
76+
[ 'site', 1 ],
77+
[ 'that', 1 ],
78+
[ 'hardly', 1 ],
79+
[ 'anyone', 1 ],
80+
[ 'can', 1 ],
81+
[ 'behold', 0.5273598005406737 ],
82+
[ 'without', 1 ],
83+
[ 'some', 1 ],
84+
[ 'sort', 1 ],
85+
[ 'of', 1 ],
86+
[ 'unwanted', 1 ],
87+
[ 'emotion', 0.49401837076320887 ] ],
88+
confidence: 0.881,
89+
transcript: 'it is a site that hardly anyone can behold without some sort of unwanted emotion ',
90+
timestamps:
91+
[ [ 'it', 20.9, 21.04 ],
92+
[ 'is', 21.04, 21.17 ],
93+
[ 'a', 21.17, 21.25 ],
94+
[ 'site', 21.25, 21.56 ],
95+
[ 'that', 21.56, 21.7 ],
96+
[ 'hardly', 21.7, 22.06 ],
97+
[ 'anyone', 22.06, 22.49 ],
98+
[ 'can', 22.49, 22.67 ],
99+
[ 'behold', 22.67, 23.13 ],
100+
[ 'without', 23.13, 23.46 ],
101+
[ 'some', 23.46, 23.67 ],
102+
[ 'sort', 23.67, 23.91 ],
103+
[ 'of', 23.91, 24 ],
104+
[ 'unwanted', 24, 24.58 ],
105+
[ 'emotion', 24.58, 25.1 ] ] },
106+
{ transcript: 'it is a sight that hardly anyone can behold without some sort of unwanted emotion ' },
107+
{ transcript: 'it is a site that hardly anyone can behold without some sort of unwanted emotions ' } ],
108+
final: true } ],
109+
result_index: 3 }
110+
```
111+
112+
113+
114+
*
41115
* @param options
42116
* @param {String} [options.model='en-US_BroadbandModel'] - voice model to use. Microphone streaming only supports broadband models.
43117
* @param {String} [options.url='wss://stream.watsonplatform.net/speech-to-text/api'] base URL for service
@@ -120,9 +194,9 @@ RecognizeStream.prototype.initialize = function () {
120194
continuous: true,
121195
inactivity_timeout: 30,
122196
interim_results: true,
123-
word_confidence: true,
124-
timestamps: true,
125-
max_alternatives: 3
197+
word_confidence: false,
198+
timestamps: false,
199+
max_alternatives: 1
126200
};
127201

128202
var openingMessage = defaults(

0 commit comments

Comments
 (0)