-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpsc_test.py
More file actions
92 lines (71 loc) · 2.75 KB
/
psc_test.py
File metadata and controls
92 lines (71 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import numpy as np
from scipy.io import wavfile
import sys
from parametric_coding import PSEncoder, PSDecoder
input_filename = 'sample.wav'
output_filename = 'output.psc.wav'
try:
sr, audio_data = wavfile.read(input_filename)
except FileNotFoundError:
print(f"Error: Input file not found at {input_filename}")
exit()
except Exception as e:
print(f"Error reading WAV file: {e}")
exit()
# Normalize audio data to float32 [-1, 1]
if audio_data.dtype == np.int16:
audio_float = audio_data.astype(np.float32) / 32768.0
else:
audio_float = audio_data.astype(np.float32)
# Ensure stereo data
if audio_float.ndim < 2:
print("Error: Input WAV file must be stereo (2 channels).")
exit()
N = len(audio_float)
# PHSC Configuration
W_SIZE = 4096 # Window size / Chunk size
HOP_SIZE = W_SIZE // 2 # 50% overlap
PSenc = PSEncoder(sr, 20, 15000, 32, -50, use_grouping=False)
PSdec = PSDecoder(sr, 20, 15000, 32, use_grouping=True)
# Initialize Overlap-Add (OLA) buffer
output = np.zeros(N + W_SIZE, dtype=np.float32).reshape(-1, 2)
print(f"\nStarting chunk processing (SR: {sr}, Chunk Size: {W_SIZE}, Hop Size: {HOP_SIZE})...")
# Processing loop
# Processing loop
for i in range(0, N, HOP_SIZE):
chunk_end = i + W_SIZE
chunk = audio_float[i:chunk_end]
# Zero-pad last chunk
if chunk.shape[0] < W_SIZE:
pad_len = W_SIZE - chunk.shape[0]
chunk = np.pad(chunk, ((0, pad_len), (0, 0)), mode='constant')
mono_audio = np.mean(chunk, axis=1)
stereo_profile = PSenc.analyze(chunk, True)
pan_values = [pan for freq, pan, ipd, ic in stereo_profile]
ipd_values = [ipd for freq, pan, ipd, ic in stereo_profile]
ic_values = [ic >= 1 for freq, pan, ipd, ic in stereo_profile]
reconstructed_stereo_unwindowed = PSdec.apply(
mono_audio=mono_audio,
pan_values=pan_values,
ipd_values=ipd_values,
ic_values=ic_values
)
# OLA with bounds checking
output_end = min(i + W_SIZE, len(output))
actual_len = output_end - i
# Skip if we're beyond the buffer
if actual_len <= 0:
break
output[i:output_end] += reconstructed_stereo_unwindowed[:actual_len]
# Optional progress log
if (i // HOP_SIZE) % 10 == 0:
sys.stdout.write(f"\rProcessed {i // HOP_SIZE} chunks... ({(i / N * 100):.1f}%)")
sys.stdout.flush()
# Trim the output signal to the original length (or slightly longer due to final window)
final_stereo_output = output[:N]
# Combine channels and convert back to int16
final_stereo_output = np.clip(final_stereo_output, -1.0, 1.0)
int16_output = (final_stereo_output * 32767).astype(np.int16)
# Write to file
wavfile.write(output_filename, sr, int16_output)
print(f"\rProcessing complete. Output written to: {output_filename}")