Skip to content

Commit 9680bad

Browse files
author
The TensorFlow Datasets Authors
committed
Fix pickling error in nsynth_dataset_builder_test
PiperOrigin-RevId: 911117164
1 parent 0d7739e commit 9680bad

1 file changed

Lines changed: 130 additions & 124 deletions

File tree

tensorflow_datasets/datasets/nsynth/nsynth_dataset_builder.py

Lines changed: 130 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,132 @@ def __init__(
136136
self.estimate_f0_and_loudness = estimate_f0_and_loudness
137137

138138

139+
def _emit_base_example(ex, split):
140+
"""Maps an input example to a TFDS example."""
141+
beam = tfds.core.lazy_imports.apache_beam
142+
beam.metrics.Metrics.counter(split, "base-examples").inc()
143+
features = ex.features.feature
144+
id_ = features["note_str"].bytes_list.value[0]
145+
return id_, {
146+
"id": id_,
147+
"audio": np.array(features["audio"].float_list.value, dtype=np.float32),
148+
"pitch": features["pitch"].int64_list.value[0],
149+
"velocity": features["velocity"].int64_list.value[0],
150+
"instrument": {
151+
"label": tf.compat.as_text(
152+
features["instrument_str"].bytes_list.value[0]
153+
),
154+
"family": tf.compat.as_text(
155+
features["instrument_family_str"].bytes_list.value[0]
156+
),
157+
"source": tf.compat.as_text(
158+
features["instrument_source_str"].bytes_list.value[0]
159+
),
160+
},
161+
"qualities": {
162+
q: features["qualities"].int64_list.value[i]
163+
for (i, q) in enumerate(_QUALITIES)
164+
},
165+
}
166+
167+
168+
def _in_split(id_ex, split_ids, split):
169+
unused_id, ex = id_ex
170+
if not split_ids or tf.compat.as_text(ex["id"]) in split_ids:
171+
beam = tfds.core.lazy_imports.apache_beam
172+
beam.metrics.Metrics.counter(split, "in-split").inc()
173+
return True
174+
return False
175+
176+
177+
def _estimate_f0(id_ex, split):
178+
"""Estimate the fundamental frequency using CREPE and add to example."""
179+
id_, ex = id_ex
180+
beam = tfds.core.lazy_imports.apache_beam
181+
beam.metrics.Metrics.counter(split, "estimate-f0").inc()
182+
183+
audio = ex["audio"]
184+
185+
# Copied from magenta/ddsp/spectral_ops.py
186+
# Pad end so that `num_frames = _NUM_SECS * _F0_AND_LOUDNESS_RATE`.
187+
hop_size = _AUDIO_RATE / _F0_AND_LOUDNESS_RATE
188+
n_samples = len(audio)
189+
n_frames = _NUM_SECS * _F0_AND_LOUDNESS_RATE
190+
n_samples_padded = (n_frames - 1) * hop_size + _CREPE_FRAME_SIZE
191+
n_padding = n_samples_padded - n_samples
192+
assert n_padding % 1 == 0
193+
audio = np.pad(audio, (0, int(n_padding)), mode="constant")
194+
crepe_step_size = 1000 / _F0_AND_LOUDNESS_RATE # milliseconds
195+
196+
_, f0_hz, f0_confidence, _ = tfds.core.lazy_imports.crepe.predict(
197+
audio,
198+
sr=_AUDIO_RATE,
199+
viterbi=True,
200+
step_size=crepe_step_size,
201+
center=False,
202+
verbose=0,
203+
)
204+
f0_midi = tfds.core.lazy_imports.librosa.core.hz_to_midi(f0_hz)
205+
# Set -infs introduced by hz_to_midi to 0.
206+
f0_midi[f0_midi == -np.inf] = 0
207+
# Set nans to 0 in confidence.
208+
f0_confidence = np.nan_to_num(f0_confidence)
209+
ex = dict(ex)
210+
ex["f0"] = {
211+
"hz": f0_hz.astype(np.float32),
212+
"midi": f0_midi.astype(np.float32),
213+
"confidence": f0_confidence.astype(np.float32),
214+
}
215+
return id_, ex
216+
217+
218+
def _calc_loudness(id_ex, split):
219+
"""Compute loudness, add to example (ref is white noise, amplitude=1)."""
220+
id_, ex = id_ex
221+
beam = tfds.core.lazy_imports.apache_beam
222+
beam.metrics.Metrics.counter(split, "compute-loudness").inc()
223+
224+
audio = ex["audio"]
225+
226+
# Copied from magenta/ddsp/spectral_ops.py
227+
# Get magnitudes.
228+
hop_size = int(_AUDIO_RATE // _F0_AND_LOUDNESS_RATE)
229+
230+
# Add padding to the end
231+
n_samples_initial = int(audio.shape[-1])
232+
n_frames = int(np.ceil(n_samples_initial / hop_size))
233+
n_samples_final = (n_frames - 1) * hop_size + _LD_N_FFT
234+
pad = n_samples_final - n_samples_initial
235+
audio = np.pad(audio, ((0, pad),), "constant")
236+
237+
librosa = tfds.core.lazy_imports.librosa
238+
spectra = librosa.stft(
239+
audio, n_fft=_LD_N_FFT, hop_length=hop_size, center=False
240+
).T
241+
242+
# Compute power
243+
amplitude = np.abs(spectra)
244+
amin = 1e-20 # Avoid log(0) instabilities.
245+
power_db = np.log10(np.maximum(amin, amplitude))
246+
power_db *= 20.0
247+
248+
# Perceptual weighting.
249+
frequencies = librosa.fft_frequencies(sr=_AUDIO_RATE, n_fft=_LD_N_FFT)
250+
a_weighting = librosa.A_weighting(frequencies)[np.newaxis, :]
251+
loudness = power_db + a_weighting
252+
253+
# Set dynamic range.
254+
loudness -= _REF_DB
255+
loudness = np.maximum(loudness, -_LD_RANGE)
256+
257+
# Average over frequency bins.
258+
mean_loudness_db = np.mean(loudness, axis=-1)
259+
260+
ex = dict(ex)
261+
ex["loudness"] = {"db": mean_loudness_db.astype(np.float32)}
262+
return id_, ex
263+
264+
139265
class Builder(tfds.core.BeamBasedBuilder):
140266
"""A large-scale and high-quality dataset of annotated musical notes."""
141267

@@ -230,141 +356,21 @@ def _build_pcollection(self, pipeline, tfrecord_dirs, ids, split):
230356
"""Build PCollection of examples for split."""
231357
beam = tfds.core.lazy_imports.apache_beam
232358

233-
def _emit_base_example(ex):
234-
"""Maps an input example to a TFDS example."""
235-
beam.metrics.Metrics.counter(split, "base-examples").inc()
236-
features = ex.features.feature
237-
id_ = features["note_str"].bytes_list.value[0]
238-
return id_, {
239-
"id": id_,
240-
"audio": np.array(
241-
features["audio"].float_list.value, dtype=np.float32
242-
),
243-
"pitch": features["pitch"].int64_list.value[0],
244-
"velocity": features["velocity"].int64_list.value[0],
245-
"instrument": {
246-
"label": tf.compat.as_text(
247-
features["instrument_str"].bytes_list.value[0]
248-
),
249-
"family": tf.compat.as_text(
250-
features["instrument_family_str"].bytes_list.value[0]
251-
),
252-
"source": tf.compat.as_text(
253-
features["instrument_source_str"].bytes_list.value[0]
254-
),
255-
},
256-
"qualities": {
257-
q: features["qualities"].int64_list.value[i]
258-
for (i, q) in enumerate(_QUALITIES)
259-
},
260-
}
261-
262-
def _in_split(id_ex, split_ids):
263-
unused_id, ex = id_ex
264-
if not split_ids or tf.compat.as_text(ex["id"]) in split_ids:
265-
beam.metrics.Metrics.counter(split, "in-split").inc()
266-
return True
267-
return False
268-
269-
def _estimate_f0(id_ex):
270-
"""Estimate the fundamental frequency using CREPE and add to example."""
271-
id_, ex = id_ex
272-
beam.metrics.Metrics.counter(split, "estimate-f0").inc()
273-
274-
audio = ex["audio"]
275-
276-
# Copied from magenta/ddsp/spectral_ops.py
277-
# Pad end so that `num_frames = _NUM_SECS * _F0_AND_LOUDNESS_RATE`.
278-
hop_size = _AUDIO_RATE / _F0_AND_LOUDNESS_RATE
279-
n_samples = len(audio)
280-
n_frames = _NUM_SECS * _F0_AND_LOUDNESS_RATE
281-
n_samples_padded = (n_frames - 1) * hop_size + _CREPE_FRAME_SIZE
282-
n_padding = n_samples_padded - n_samples
283-
assert n_padding % 1 == 0
284-
audio = np.pad(audio, (0, int(n_padding)), mode="constant")
285-
crepe_step_size = 1000 / _F0_AND_LOUDNESS_RATE # milliseconds
286-
287-
_, f0_hz, f0_confidence, _ = tfds.core.lazy_imports.crepe.predict(
288-
audio,
289-
sr=_AUDIO_RATE,
290-
viterbi=True,
291-
step_size=crepe_step_size,
292-
center=False,
293-
verbose=0,
294-
)
295-
f0_midi = tfds.core.lazy_imports.librosa.core.hz_to_midi(f0_hz)
296-
# Set -infs introduced by hz_to_midi to 0.
297-
f0_midi[f0_midi == -np.inf] = 0
298-
# Set nans to 0 in confidence.
299-
f0_confidence = np.nan_to_num(f0_confidence)
300-
ex = dict(ex)
301-
ex["f0"] = {
302-
"hz": f0_hz.astype(np.float32),
303-
"midi": f0_midi.astype(np.float32),
304-
"confidence": f0_confidence.astype(np.float32),
305-
}
306-
return id_, ex
307-
308-
def _calc_loudness(id_ex):
309-
"""Compute loudness, add to example (ref is white noise, amplitude=1)."""
310-
id_, ex = id_ex
311-
beam.metrics.Metrics.counter(split, "compute-loudness").inc()
312-
313-
audio = ex["audio"]
314-
315-
# Copied from magenta/ddsp/spectral_ops.py
316-
# Get magnitudes.
317-
hop_size = int(_AUDIO_RATE // _F0_AND_LOUDNESS_RATE)
318-
319-
# Add padding to the end
320-
n_samples_initial = int(audio.shape[-1])
321-
n_frames = int(np.ceil(n_samples_initial / hop_size))
322-
n_samples_final = (n_frames - 1) * hop_size + _LD_N_FFT
323-
pad = n_samples_final - n_samples_initial
324-
audio = np.pad(audio, ((0, pad),), "constant")
325-
326-
librosa = tfds.core.lazy_imports.librosa
327-
spectra = librosa.stft(
328-
audio, n_fft=_LD_N_FFT, hop_length=hop_size, center=False
329-
).T
330-
331-
# Compute power
332-
amplitude = np.abs(spectra)
333-
amin = 1e-20 # Avoid log(0) instabilities.
334-
power_db = np.log10(np.maximum(amin, amplitude))
335-
power_db *= 20.0
336-
337-
# Perceptual weighting.
338-
frequencies = librosa.fft_frequencies(sr=_AUDIO_RATE, n_fft=_LD_N_FFT)
339-
a_weighting = librosa.A_weighting(frequencies)[np.newaxis, :]
340-
loudness = power_db + a_weighting
341-
342-
# Set dynamic range.
343-
loudness -= _REF_DB
344-
loudness = np.maximum(loudness, -_LD_RANGE)
345-
346-
# Average over frequency bins.
347-
mean_loudness_db = np.mean(loudness, axis=-1)
348-
349-
ex = dict(ex)
350-
ex["loudness"] = {"db": mean_loudness_db.astype(np.float32)}
351-
return id_, ex
352-
353359
examples = (
354360
pipeline
355361
| beam.Create([os.path.join(dir_, "*") for dir_ in tfrecord_dirs])
356362
| beam.io.tfrecordio.ReadAllFromTFRecord(
357363
coder=beam.coders.ProtoCoder(tf.train.Example)
358364
)
359-
| beam.Map(_emit_base_example)
360-
| beam.Filter(_in_split, split_ids=ids)
365+
| beam.Map(_emit_base_example, split=split)
366+
| beam.Filter(_in_split, split_ids=ids, split=split)
361367
)
362368
if self.builder_config.estimate_f0_and_loudness:
363369
examples = (
364370
examples
365371
| beam.Reshuffle()
366-
| beam.Map(_estimate_f0)
367-
| beam.Map(_calc_loudness)
372+
| beam.Map(_estimate_f0, split=split)
373+
| beam.Map(_calc_loudness, split=split)
368374
)
369375

370376
return examples

0 commit comments

Comments
 (0)