-
Notifications
You must be signed in to change notification settings - Fork 39
Expand file tree
/
Copy pathdecoder.rb
More file actions
216 lines (183 loc) · 6.94 KB
/
decoder.rb
File metadata and controls
216 lines (183 loc) · 6.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
module Pocketsphinx
class Decoder
require 'delegate'
include API::CallHelpers
class Hypothesis < SimpleDelegator
attr_accessor :path_score
attr_accessor :posterior_prob
def initialize(string, path_score, posterior_prob = nil)
@path_score = path_score
@posterior_prob = posterior_prob
super(string)
end
end
Word = Struct.new(:word, :start_frame, :end_frame)
attr_writer :ps_api
attr_accessor :configuration
# Initialize a Decoder
#
# Note that this initialization process actually updates the Configuration based on settings
# which are found in feat.params along with the acoustic model.
#
# @param [Configuration] configuration
# @param [FFI::Pointer] ps_decoder An optional Pocketsphinx decoder. One is initialized if not provided.
def initialize(configuration, ps_decoder = nil)
@configuration = configuration
init_decoder if ps_decoder.nil?
end
# Reinitialize the decoder with updated configuration.
#
# This function allows you to switch the acoustic model, dictionary, or other configuration
# without creating an entirely new decoding object.
#
# @param [Configuration] configuration An optional new configuration to use. If this is
# nil, the previous configuration will be reloaded, with any changes applied.
def reconfigure(configuration = nil)
self.configuration = configuration if configuration
reinit_decoder
end
# Decode a raw audio stream as a single utterance, opening a file if path given
#
# See #decode_raw
#
# @param [IO] audio_path_or_file The raw audio stream or file path to decode as a single utterance
# @param [Fixnum] max_samples The maximum samples to process from the stream on each iteration
def decode(audio_path_or_file, max_samples = 2048)
case audio_path_or_file
when String
File.open(audio_path_or_file, 'rb') { |f| decode_raw(f, max_samples) }
else
decode_raw(audio_path_or_file, max_samples)
end
end
# Decode a raw audio stream as a single utterance.
#
# No headers are recognized in this files. The configuration parameters samprate
# and input_endian are used to determine the sampling rate and endianness of the stream,
# respectively. Audio is always assumed to be 16-bit signed PCM.
#
# @param [IO] audio_file The raw audio stream to decode as a single utterance
# @param [Fixnum] max_samples The maximum samples to process from the stream on each iteration
def decode_raw(audio_file, max_samples = 2048)
start_utterance
FFI::MemoryPointer.new(:int16, max_samples) do |buffer|
while data = audio_file.read(max_samples * 2)
buffer.write_string(data)
process_raw(buffer, data.length / 2)
end
end
end_utterance
end
# Decode raw audio data.
#
# @param [Boolean] no_search If non-zero, perform feature extraction but don't do any
# recognition yet. This may be necessary if your processor has trouble doing recognition in
# real-time.
# @param [Boolean] full_utt If non-zero, this block of data is a full utterance
# worth of data. This may allow the recognizer to produce more accurate results.
# @return Number of frames of data searched
def process_raw(buffer, size, no_search = false, full_utt = false)
api_call :ps_process_raw, ps_decoder, buffer, size, no_search ? 1 : 0, full_utt ? 1 : 0
end
# Start utterance processing.
#
# This function should be called before any utterance data is passed
# to the decoder. It marks the start of a new utterance and
# reinitializes internal data structures.
def start_utterance
api_call :ps_start_utt, ps_decoder
end
# End utterance processing
def end_utterance
api_call :ps_end_utt, ps_decoder
end
# Checks if the last feed audio buffer contained speech
def in_speech?
ps_api.ps_get_in_speech(ps_decoder) != 0
end
# Get hypothesis string (with #path_score and #utterance_id).
#
# @return [Hypothesis] Hypothesis (behaves like a string)
def hypothesis
mp_path_score = FFI::MemoryPointer.new(:int32, 1)
logmath = ps_api.ps_get_logmath(ps_decoder)
hypothesis = ps_api.ps_get_hyp(ps_decoder, mp_path_score)
posterior_prob = ps_api.logmath_exp(logmath, mp_path_score.get_int32(0))
hypothesis.nil? ? nil : Hypothesis.new(
hypothesis,
mp_path_score.get_int32(0),
posterior_prob
)
end
# Get an array of words with start/end frame values (10msec/frame) for current hypothesis
#
# @return [Array] Array of words with start/end frame values (10msec/frame)
def words
mp_path_score = FFI::MemoryPointer.new(:int32, 1)
start_frame = FFI::MemoryPointer.new(:int32, 1)
end_frame = FFI::MemoryPointer.new(:int32, 1)
seg_iter = ps_api.ps_seg_iter(ps_decoder, mp_path_score)
words = []
until seg_iter.null? do
ps_api.ps_seg_frames(seg_iter, start_frame, end_frame)
words << Pocketsphinx::Decoder::Word.new(
ps_api.ps_seg_word(seg_iter),
start_frame.get_int32(0),
end_frame.get_int32(0)
)
seg_iter = ps_api.ps_seg_next(seg_iter)
end
words
end
# Adds new search using JSGF model.
#
# Convenience method to parse JSGF model from string and create a search.
#
# @param [String] jsgf_string The JSGF grammar
# @param [String] name The search name
def set_jsgf_string(jsgf_string, name = 'default')
api_call :ps_set_jsgf_string, ps_decoder, name, jsgf_string
end
# Returns name of curent search in decoder
def get_search
ps_api.ps_get_search(ps_decoder)
end
# Actives search with the provided name.
#
# Activates search with the provided name. The search must be added before
# using either ps_set_fsg(), ps_set_lm() or ps_set_kws().
def set_search(name = 'default')
api_call :ps_set_search, ps_decoder, name
end
# Unsets the search and releases related resources.
#
# Unsets the search previously added with
# using either ps_set_fsg(), ps_set_lm() or ps_set_kws().
def unset_search(name = 'default')
api_call :ps_unset_search, ps_decoder, name
end
def ps_api
@ps_api || API::Pocketsphinx
end
def ps_decoder
init_decoder if @ps_decoder.nil?
@ps_decoder
end
private
def init_decoder
@ps_decoder = ps_api.ps_init(configuration.ps_config)
post_init_decoder
end
def reinit_decoder
ps_api.ps_reinit(ps_decoder, configuration.ps_config).tap do |result|
raise API::Error, "Decoder#reconfigure failed with error code #{result}" if result < 0
post_init_decoder
end
end
def post_init_decoder
if configuration.respond_to?(:post_init_decoder)
configuration.post_init_decoder(self)
end
end
end
end