11package audio
22
3- import "math"
3+ import (
4+ "math"
5+ "math/rand/v2"
6+ )
47
58// Linear16ToMulaw converts 16-bit PCM samples to 8-bit mu-law.
9+ // Applies TPDF dithering to decorrelate quantization error.
610func Linear16ToMulaw (pcmData []byte ) []byte {
711 numSamples := len (pcmData ) / 2
812 mulaw := make ([]byte , numSamples )
13+ rng := rand .New (rand .NewPCG (0 , 42 ))
914 for i := 0 ; i < numSamples ; i ++ {
1015 sample := int16 (pcmData [i * 2 ]) | int16 (pcmData [i * 2 + 1 ])<< 8
11- mulaw [i ] = linearToMulaw (sample )
16+ // TPDF dithering: triangular noise scaled to mu-law's minimum step size (2 in 16-bit).
17+ // Sum of two uniform[-1,1] values gives triangular[-2,2] distribution.
18+ dither := (rng .Float64 ()* 2 - 1 ) + (rng .Float64 ()* 2 - 1 )
19+ dithered := float64 (sample ) + dither
20+ if dithered > 32767 {
21+ dithered = 32767
22+ } else if dithered < - 32768 {
23+ dithered = - 32768
24+ }
25+ mulaw [i ] = linearToMulaw (int16 (math .Round (dithered )))
1226 }
1327 return mulaw
1428}
1529
1630// Linear16ToAlaw converts 16-bit PCM samples to 8-bit A-law.
31+ // Applies TPDF dithering to decorrelate quantization error.
1732func Linear16ToAlaw (pcmData []byte ) []byte {
1833 numSamples := len (pcmData ) / 2
1934 alaw := make ([]byte , numSamples )
35+ rng := rand .New (rand .NewPCG (0 , 42 ))
2036 for i := 0 ; i < numSamples ; i ++ {
2137 sample := int16 (pcmData [i * 2 ]) | int16 (pcmData [i * 2 + 1 ])<< 8
22- alaw [i ] = linearToAlaw (sample )
38+ dither := (rng .Float64 ()* 2 - 1 ) + (rng .Float64 ()* 2 - 1 )
39+ dithered := float64 (sample ) + dither
40+ if dithered > 32767 {
41+ dithered = 32767
42+ } else if dithered < - 32768 {
43+ dithered = - 32768
44+ }
45+ alaw [i ] = linearToAlaw (int16 (math .Round (dithered )))
2346 }
2447 return alaw
2548}
2649
2750// linearToMulaw converts a 16-bit linear sample to 8-bit mu-law.
2851func linearToMulaw (sample int16 ) byte {
2952 const (
30- mulawMax = 0x1FFF
31- mulawBias = 33
53+ mulawBias = 0x84 // 132, per ITU G.711
3254 mulawClip = 32635
3355 )
3456
@@ -86,8 +108,9 @@ func linearToAlaw(sample int16) byte {
86108 return byte ((exponent << 4 ) | mantissa ) ^ byte (sign )
87109}
88110
89- // ResampleLinear16 resamples 16-bit PCM from srcRate to dstRate.
90- // When downsampling, applies a windowed-sinc low-pass filter to prevent aliasing.
111+ // ResampleLinear16 resamples 16-bit PCM from srcRate to dstRate using
112+ // polyphase windowed-sinc interpolation. This combines anti-alias filtering
113+ // and interpolation into a single step for maximum quality.
91114func ResampleLinear16 (pcmData []byte , srcRate , dstRate int ) []byte {
92115 if srcRate == dstRate {
93116 return pcmData
@@ -99,29 +122,75 @@ func ResampleLinear16(pcmData []byte, srcRate, dstRate int) []byte {
99122 src [i ] = float64 (int16 (pcmData [i * 2 ]) | int16 (pcmData [i * 2 + 1 ])<< 8 )
100123 }
101124
102- // Apply low-pass filter before downsampling to prevent aliasing.
103- // Cutoff at Nyquist of the target rate.
125+ ratio := float64 (srcRate ) / float64 (dstRate )
126+ numDstSamples := int (float64 (numSrcSamples ) / ratio )
127+
128+ // Normalized cutoff frequency (relative to source sample rate).
129+ // Use 0.95 * Nyquist to give the transition band room to attenuate
130+ // before Nyquist, preventing aliasing energy from leaking through.
131+ var fc float64
104132 if dstRate < srcRate {
105- src = lowPass (src , srcRate , dstRate / 2 )
133+ // Downsampling: cut at 0.95 * target Nyquist
134+ fc = 0.95 * float64 (dstRate ) / (2.0 * float64 (srcRate ))
135+ } else {
136+ // Upsampling: cut at 0.95 * source Nyquist (anti-imaging)
137+ fc = 0.95 * 0.5
106138 }
107139
108- ratio := float64 (srcRate ) / float64 (dstRate )
109- numDstSamples := int (float64 (numSrcSamples ) / ratio )
140+ // Kernel half-length sized by zero-crossings of the sinc function.
141+ // One zero-crossing occurs every 1/(2*fc) input samples.
142+ // 32 zero-crossings per side gives excellent stopband attenuation.
143+ const numZeroCrossings = 32
144+ halfLen := int (math .Ceil (float64 (numZeroCrossings ) / (2.0 * fc )))
145+ if halfLen < 32 {
146+ halfLen = 32
147+ }
148+ if halfLen > 512 {
149+ halfLen = 512
150+ }
110151
111152 result := make ([]byte , numDstSamples * 2 )
112153 for i := 0 ; i < numDstSamples ; i ++ {
113154 srcPos := float64 (i ) * ratio
114- srcIdx := int (srcPos )
115- frac := srcPos - float64 (srcIdx )
155+ srcIdx := int (math .Floor (srcPos ))
156+
157+ jMin := srcIdx - halfLen + 1
158+ if jMin < 0 {
159+ jMin = 0
160+ }
161+ jMax := srcIdx + halfLen
162+ if jMax >= numSrcSamples {
163+ jMax = numSrcSamples - 1
164+ }
116165
117166 var sample float64
118- if srcIdx + 1 < numSrcSamples {
119- sample = src [srcIdx ]* (1 - frac ) + src [srcIdx + 1 ]* frac
120- } else {
121- sample = src [srcIdx ]
167+ var weightSum float64
168+ for j := jMin ; j <= jMax ; j ++ {
169+ d := float64 (j ) - srcPos
170+
171+ // Windowed sinc: sinc(2*fc*d) * blackman(d)
172+ var sincVal float64
173+ arg := 2.0 * fc * d
174+ if math .Abs (arg ) < 1e-9 {
175+ sincVal = 1.0
176+ } else {
177+ sincVal = math .Sin (math .Pi * arg ) / (math .Pi * arg )
178+ }
179+
180+ // Blackman window centered at srcPos, spanning ±halfLen
181+ wPos := (d + float64 (halfLen )) / float64 (2 * halfLen )
182+ w := 0.42 - 0.5 * math .Cos (2 * math .Pi * wPos ) + 0.08 * math .Cos (4 * math .Pi * wPos )
183+
184+ weight := sincVal * w
185+ sample += src [j ] * weight
186+ weightSum += weight
187+ }
188+
189+ // Normalize to preserve amplitude
190+ if weightSum != 0 {
191+ sample /= weightSum
122192 }
123193
124- // Clamp to int16 range
125194 if sample > 32767 {
126195 sample = 32767
127196 } else if sample < - 32768 {
@@ -134,51 +203,3 @@ func ResampleLinear16(pcmData []byte, srcRate, dstRate int) []byte {
134203 }
135204 return result
136205}
137-
138- // lowPass applies a windowed-sinc FIR low-pass filter.
139- func lowPass (samples []float64 , sampleRate , cutoffHz int ) []float64 {
140- // Filter order scales with the ratio for good stopband attenuation
141- halfLen := sampleRate / cutoffHz * 8
142- if halfLen < 32 {
143- halfLen = 32
144- }
145- if halfLen > 256 {
146- halfLen = 256
147- }
148-
149- fc := float64 (cutoffHz ) / float64 (sampleRate )
150- kernel := make ([]float64 , 2 * halfLen + 1 )
151- var sum float64
152- for i := - halfLen ; i <= halfLen ; i ++ {
153- if i == 0 {
154- kernel [i + halfLen ] = 2 * math .Pi * fc
155- } else {
156- x := float64 (i )
157- // Sinc
158- sinc := math .Sin (2 * math .Pi * fc * x ) / x
159- // Blackman window
160- w := 0.42 - 0.5 * math .Cos (2 * math .Pi * float64 (i + halfLen )/ float64 (2 * halfLen )) + 0.08 * math .Cos (4 * math .Pi * float64 (i + halfLen )/ float64 (2 * halfLen ))
161- kernel [i + halfLen ] = sinc * w
162- }
163- sum += kernel [i + halfLen ]
164- }
165- // Normalize
166- for i := range kernel {
167- kernel [i ] /= sum
168- }
169-
170- // Convolve
171- n := len (samples )
172- out := make ([]float64 , n )
173- for i := 0 ; i < n ; i ++ {
174- var v float64
175- for j := - halfLen ; j <= halfLen ; j ++ {
176- idx := i + j
177- if idx >= 0 && idx < n {
178- v += samples [idx ] * kernel [j + halfLen ]
179- }
180- }
181- out [i ] = v
182- }
183- return out
184- }
0 commit comments