Skip to content

Commit 9960924

Browse files
committed
More advanced g.711 mu-law conversion for maximum quality
1 parent f440284 commit 9960924

2 files changed

Lines changed: 590 additions & 67 deletions

File tree

internal/audio/convert.go

Lines changed: 88 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,56 @@
11
package audio
22

3-
import "math"
3+
import (
4+
"math"
5+
"math/rand/v2"
6+
)
47

58
// Linear16ToMulaw converts 16-bit PCM samples to 8-bit mu-law.
9+
// Applies TPDF dithering to decorrelate quantization error.
610
func Linear16ToMulaw(pcmData []byte) []byte {
711
numSamples := len(pcmData) / 2
812
mulaw := make([]byte, numSamples)
13+
rng := rand.New(rand.NewPCG(0, 42))
914
for i := 0; i < numSamples; i++ {
1015
sample := int16(pcmData[i*2]) | int16(pcmData[i*2+1])<<8
11-
mulaw[i] = linearToMulaw(sample)
16+
// TPDF dithering: triangular noise scaled to mu-law's minimum step size (2 in 16-bit).
17+
// Sum of two uniform[-1,1] values gives triangular[-2,2] distribution.
18+
dither := (rng.Float64()*2 - 1) + (rng.Float64()*2 - 1)
19+
dithered := float64(sample) + dither
20+
if dithered > 32767 {
21+
dithered = 32767
22+
} else if dithered < -32768 {
23+
dithered = -32768
24+
}
25+
mulaw[i] = linearToMulaw(int16(math.Round(dithered)))
1226
}
1327
return mulaw
1428
}
1529

1630
// Linear16ToAlaw converts 16-bit PCM samples to 8-bit A-law.
31+
// Applies TPDF dithering to decorrelate quantization error.
1732
func Linear16ToAlaw(pcmData []byte) []byte {
1833
numSamples := len(pcmData) / 2
1934
alaw := make([]byte, numSamples)
35+
rng := rand.New(rand.NewPCG(0, 42))
2036
for i := 0; i < numSamples; i++ {
2137
sample := int16(pcmData[i*2]) | int16(pcmData[i*2+1])<<8
22-
alaw[i] = linearToAlaw(sample)
38+
dither := (rng.Float64()*2 - 1) + (rng.Float64()*2 - 1)
39+
dithered := float64(sample) + dither
40+
if dithered > 32767 {
41+
dithered = 32767
42+
} else if dithered < -32768 {
43+
dithered = -32768
44+
}
45+
alaw[i] = linearToAlaw(int16(math.Round(dithered)))
2346
}
2447
return alaw
2548
}
2649

2750
// linearToMulaw converts a 16-bit linear sample to 8-bit mu-law.
2851
func linearToMulaw(sample int16) byte {
2952
const (
30-
mulawMax = 0x1FFF
31-
mulawBias = 33
53+
mulawBias = 0x84 // 132, per ITU G.711
3254
mulawClip = 32635
3355
)
3456

@@ -86,8 +108,9 @@ func linearToAlaw(sample int16) byte {
86108
return byte((exponent << 4) | mantissa) ^ byte(sign)
87109
}
88110

89-
// ResampleLinear16 resamples 16-bit PCM from srcRate to dstRate.
90-
// When downsampling, applies a windowed-sinc low-pass filter to prevent aliasing.
111+
// ResampleLinear16 resamples 16-bit PCM from srcRate to dstRate using
112+
// polyphase windowed-sinc interpolation. This combines anti-alias filtering
113+
// and interpolation into a single step for maximum quality.
91114
func ResampleLinear16(pcmData []byte, srcRate, dstRate int) []byte {
92115
if srcRate == dstRate {
93116
return pcmData
@@ -99,29 +122,75 @@ func ResampleLinear16(pcmData []byte, srcRate, dstRate int) []byte {
99122
src[i] = float64(int16(pcmData[i*2]) | int16(pcmData[i*2+1])<<8)
100123
}
101124

102-
// Apply low-pass filter before downsampling to prevent aliasing.
103-
// Cutoff at Nyquist of the target rate.
125+
ratio := float64(srcRate) / float64(dstRate)
126+
numDstSamples := int(float64(numSrcSamples) / ratio)
127+
128+
// Normalized cutoff frequency (relative to source sample rate).
129+
// Use 0.95 * Nyquist to give the transition band room to attenuate
130+
// before Nyquist, preventing aliasing energy from leaking through.
131+
var fc float64
104132
if dstRate < srcRate {
105-
src = lowPass(src, srcRate, dstRate/2)
133+
// Downsampling: cut at 0.95 * target Nyquist
134+
fc = 0.95 * float64(dstRate) / (2.0 * float64(srcRate))
135+
} else {
136+
// Upsampling: cut at 0.95 * source Nyquist (anti-imaging)
137+
fc = 0.95 * 0.5
106138
}
107139

108-
ratio := float64(srcRate) / float64(dstRate)
109-
numDstSamples := int(float64(numSrcSamples) / ratio)
140+
// Kernel half-length sized by zero-crossings of the sinc function.
141+
// One zero-crossing occurs every 1/(2*fc) input samples.
142+
// 32 zero-crossings per side gives excellent stopband attenuation.
143+
const numZeroCrossings = 32
144+
halfLen := int(math.Ceil(float64(numZeroCrossings) / (2.0 * fc)))
145+
if halfLen < 32 {
146+
halfLen = 32
147+
}
148+
if halfLen > 512 {
149+
halfLen = 512
150+
}
110151

111152
result := make([]byte, numDstSamples*2)
112153
for i := 0; i < numDstSamples; i++ {
113154
srcPos := float64(i) * ratio
114-
srcIdx := int(srcPos)
115-
frac := srcPos - float64(srcIdx)
155+
srcIdx := int(math.Floor(srcPos))
156+
157+
jMin := srcIdx - halfLen + 1
158+
if jMin < 0 {
159+
jMin = 0
160+
}
161+
jMax := srcIdx + halfLen
162+
if jMax >= numSrcSamples {
163+
jMax = numSrcSamples - 1
164+
}
116165

117166
var sample float64
118-
if srcIdx+1 < numSrcSamples {
119-
sample = src[srcIdx]*(1-frac) + src[srcIdx+1]*frac
120-
} else {
121-
sample = src[srcIdx]
167+
var weightSum float64
168+
for j := jMin; j <= jMax; j++ {
169+
d := float64(j) - srcPos
170+
171+
// Windowed sinc: sinc(2*fc*d) * blackman(d)
172+
var sincVal float64
173+
arg := 2.0 * fc * d
174+
if math.Abs(arg) < 1e-9 {
175+
sincVal = 1.0
176+
} else {
177+
sincVal = math.Sin(math.Pi*arg) / (math.Pi * arg)
178+
}
179+
180+
// Blackman window centered at srcPos, spanning ±halfLen
181+
wPos := (d + float64(halfLen)) / float64(2*halfLen)
182+
w := 0.42 - 0.5*math.Cos(2*math.Pi*wPos) + 0.08*math.Cos(4*math.Pi*wPos)
183+
184+
weight := sincVal * w
185+
sample += src[j] * weight
186+
weightSum += weight
187+
}
188+
189+
// Normalize to preserve amplitude
190+
if weightSum != 0 {
191+
sample /= weightSum
122192
}
123193

124-
// Clamp to int16 range
125194
if sample > 32767 {
126195
sample = 32767
127196
} else if sample < -32768 {
@@ -134,51 +203,3 @@ func ResampleLinear16(pcmData []byte, srcRate, dstRate int) []byte {
134203
}
135204
return result
136205
}
137-
138-
// lowPass applies a windowed-sinc FIR low-pass filter.
139-
func lowPass(samples []float64, sampleRate, cutoffHz int) []float64 {
140-
// Filter order scales with the ratio for good stopband attenuation
141-
halfLen := sampleRate / cutoffHz * 8
142-
if halfLen < 32 {
143-
halfLen = 32
144-
}
145-
if halfLen > 256 {
146-
halfLen = 256
147-
}
148-
149-
fc := float64(cutoffHz) / float64(sampleRate)
150-
kernel := make([]float64, 2*halfLen+1)
151-
var sum float64
152-
for i := -halfLen; i <= halfLen; i++ {
153-
if i == 0 {
154-
kernel[i+halfLen] = 2 * math.Pi * fc
155-
} else {
156-
x := float64(i)
157-
// Sinc
158-
sinc := math.Sin(2*math.Pi*fc*x) / x
159-
// Blackman window
160-
w := 0.42 - 0.5*math.Cos(2*math.Pi*float64(i+halfLen)/float64(2*halfLen)) + 0.08*math.Cos(4*math.Pi*float64(i+halfLen)/float64(2*halfLen))
161-
kernel[i+halfLen] = sinc * w
162-
}
163-
sum += kernel[i+halfLen]
164-
}
165-
// Normalize
166-
for i := range kernel {
167-
kernel[i] /= sum
168-
}
169-
170-
// Convolve
171-
n := len(samples)
172-
out := make([]float64, n)
173-
for i := 0; i < n; i++ {
174-
var v float64
175-
for j := -halfLen; j <= halfLen; j++ {
176-
idx := i + j
177-
if idx >= 0 && idx < n {
178-
v += samples[idx] * kernel[j+halfLen]
179-
}
180-
}
181-
out[i] = v
182-
}
183-
return out
184-
}

0 commit comments

Comments
 (0)