-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvidwav.py
More file actions
95 lines (70 loc) · 2.69 KB
/
vidwav.py
File metadata and controls
95 lines (70 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
# Uncomment if you want to use TeX
# from matplotlib import rc
# rc('text', usetex=True)
# rc('font', **{'family':'serif', 'serif':['Computer Modern Roman'],
# 'monospace': ['Computer Modern Typewriter'], 'size':14})
import matplotlib.animation as manimation
import wave
from matplotlib.ticker import FuncFormatter
def kilo(x, pos):
return '%1.fk' % (x * 1e-3)
def vidwav(wavfile, fps=25):
FFMpegWriter = manimation.writers['ffmpeg']
metadata = dict(title='Wav Spectrogram', artist='Matplotlib',
comment='')
writer = FFMpegWriter(fps=fps, metadata=metadata, bitrate=3500)
wf = wave.open(wavfile, 'rb')
fs = wf.getframerate()
N = wf.getnframes()
duration = N / float(fs)
bytes_per_sample = wf.getsampwidth()
bits_per_sample = bytes_per_sample * 8
dtype = 'int{0}'.format(bits_per_sample)
channels = wf.getnchannels()
audio = np.fromstring(wf.readframes(int(duration * fs * bytes_per_sample / channels)), dtype=dtype)
audio.shape = (int(audio.shape[0] / channels), channels)
freqs = np.fft.fftfreq(audio[:, 0].shape[0], 1.0 / fs) / 1000.0
max_freq_kHz = freqs.max()
times = np.arange(audio.shape[0]) / float(fs)
fig = plt.figure(figsize=(6.4, 4.8))
plt.subplot(211)
plt.plot(times, (audio[:, 0]).astype(float) / np.max(np.abs(audio[:, 0])), c='k', lw=.3)
plt.xlim(0, duration)
plt.ylim(-1, 1)
l1, = plt.plot([], [], '#333333', lw=2)
plt.subplot(212)
plt.specgram(audio[:, 0], Fs=fs, cmap=plt.get_cmap('jet'))
plt.xlim(0, duration)
plt.ylim(0, max_freq_kHz * 1000.0)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
formatter = FuncFormatter(kilo)
ax = plt.gca()
ax.yaxis.set_major_formatter(formatter)
l2, = plt.plot([], [], '#333333', lw=2)
# plt.tight_layout()
plt.subplots_adjust(bottom=0.09,
right=0.98,
top=0.98,
left=0.08,
hspace=0.14)
x = np.array([0., 0.])
y0 = np.array([-1, 1])
y1 = np.array([0, max_freq_kHz * 1000.0])
with writer.saving(fig, "temp.mp4", 100):
for i in range((int(duration) + 1) * fps):
x += 1.0 / float(fps)
l1.set_data(x, y0)
l2.set_data(x, y1)
writer.grab_frame()
import os
os.system("ffmpeg -y -i " + wavfile + " -i temp.mp4 -c:v copy -strict -2 " + wavfile.split('.')[0] + ".mp4")
os.remove("temp.mp4")
def main():
vidwav(r"C:\Users\ramra\PycharmProjects\OneShotOneShot\one_shot_percussive_sounds\1\471.wav")
if __name__ == "__main__":
main()