Skip to content

Commit 9e6a60a

Browse files
committed
Fix non-MB sources writing IDs to MusicBrainz tags
Spotify and Deezer IDs were being written into MusicBrainz tag fields (mb_albumid, mb_trackid, etc.) because the generic album_id/track_id fields are mapped to those tags in MEDIA_FIELD_MAP. This caused third-party players to reject files with errors like 'Invalid MusicBrainz identifier: 158760742'. Fix by validating that values destined for mb_* fields match the standard MusicBrainz UUID format before writing them. Fixes #6519
1 parent bacebe2 commit 9e6a60a

3 files changed

Lines changed: 190 additions & 0 deletions

File tree

beets/autotag/hooks.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from __future__ import annotations
1818

19+
import re
1920
from copy import deepcopy
2021
from dataclasses import dataclass, field
2122
from functools import cached_property
@@ -44,6 +45,38 @@
4445

4546
log = logging.getLogger("beets")
4647

48+
# MusicBrainz UUIDs standard 8-4-4-4-12 hex format.
49+
_MB_UUID_RE = re.compile(
50+
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
51+
re.IGNORECASE,
52+
)
53+
54+
# Fields in item_data that must contain valid MusicBrainz UUIDs.
55+
# Values that don't match are dropped
56+
# cannot accidentally populate MusicBrainz tags with their own numeric/opaque IDs.
57+
_MB_ID_FIELDS = frozenset(
58+
{
59+
"mb_albumid",
60+
"mb_albumartistid",
61+
"mb_albumartistids",
62+
"mb_artistid",
63+
"mb_artistids",
64+
"mb_releasegroupid",
65+
"mb_releasetrackid",
66+
"mb_trackid",
67+
}
68+
)
69+
70+
71+
def _is_valid_mb_id(value: Any) -> bool:
72+
"""Return True if *value* is a valid MusicBrainz UUID (or a list of them)."""
73+
if isinstance(value, list):
74+
return bool(value) and all(
75+
_MB_UUID_RE.match(str(v)) for v in value if v is not None
76+
)
77+
return bool(value and _MB_UUID_RE.match(str(value)))
78+
79+
4780
SYNCHRONISED_LIST_FIELDS = {
4881
("albumtype", "albumtypes"),
4982
("artist", "artists"),
@@ -201,6 +234,11 @@ def item_data(self) -> JSONDict:
201234
):
202235
data[media_field] = data.pop(info_field)
203236

237+
# Drop any MusicBrainz ID field whose value is not a valid MB UUID.
238+
for mb_field in _MB_ID_FIELDS:
239+
if mb_field in data and not _is_valid_mb_id(data[mb_field]):
240+
del data[mb_field]
241+
204242
return data
205243

206244
def __init__(

docs/changelog.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ New features
3838
Bug fixes
3939
~~~~~~~~~
4040

41+
<<<<<<< HEAD
4142
- :ref:`import-cmd` Automatically remux WAV files containing MP3 streams
4243
(``WAVE_FORMAT_MPEGLAYER3``) to proper MP3 files during import, instead of
4344
silently importing them with incorrect metadata. :bug:`6455`
@@ -49,6 +50,13 @@ Bug fixes
4950
registry instead of unconditionally instantiating its own private instance,
5051
which also restores compatibility with :doc:`plugins/mbpseudo` for
5152
chroma-triggered lookups. :bug:`6212` :bug:`6441`
53+
=======
54+
- Correctly handle semicolon-delimited genre values from externally-tagged
55+
files. :bug:`6450`
56+
- Fix non-MusicBrainz sources (e.g. Spotify, Deezer) incorrectly writing
57+
their numeric or opaque IDs into MusicBrainz tag fields, which caused
58+
third-party players to reject the files. :bug:`6519`
59+
>>>>>>> e157fd245 (Fix non-MB sources writing IDs to MusicBrainz tags)
5260

5361
For plugin developers
5462
~~~~~~~~~~~~~~~~~~~~~

test/autotag/test_hooks.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
AlbumMatch,
2626
TrackInfo,
2727
TrackMatch,
28+
_is_valid_mb_id,
2829
correct_list_fields,
2930
)
3031
from beets.library import Item
@@ -426,3 +427,146 @@ def test_correct_list_fields(
426427
data = correct_list_fields(input_data)
427428

428429
assert (data[single_field], data[list_field]) == expected_values
430+
431+
432+
# Tests for issue #6519: non-MB sources must not pollute MusicBrainz ID tags
433+
# Real IDs from the issue report (Deezer numeric) and a typical Spotify ID.
434+
_SPOTIFY_ALBUM_ID = "4aawyAB9vmqN3uQ7FjRGTy"
435+
_SPOTIFY_TRACK_ID = "6rqhFgbbKwnb9MLmUQDhG6"
436+
_SPOTIFY_ARTIST_ID = "4Z8W4fKeB5YxbusRsdQVPb"
437+
_DEEZER_ALBUM_ID = "158760742"
438+
_DEEZER_TRACK_ID = "930705291"
439+
_DEEZER_ARTIST_ID = "3876259001"
440+
441+
_VALID_MB_ALBUM_UUID = "7edb51cb-77d6-4416-a23c-3a8c2994a2c7"
442+
_VALID_MB_TRACK_UUID = "dfa939ec-118c-4d0f-84a0-60f3d1e6522c"
443+
_VALID_MB_ARTIST_UUID = "a6623d39-2d8e-4f70-8242-0a9553b91e50"
444+
445+
446+
@pytest.mark.parametrize(
447+
"value, expected",
448+
[
449+
# Valid UUIDs
450+
(_VALID_MB_ALBUM_UUID, True),
451+
(_VALID_MB_TRACK_UUID, True),
452+
# Valid UUID list
453+
([_VALID_MB_ALBUM_UUID, _VALID_MB_ARTIST_UUID], True),
454+
# Spotify base-62 IDs
455+
(_SPOTIFY_ALBUM_ID, False),
456+
(_SPOTIFY_TRACK_ID, False),
457+
# Deezer numeric IDs
458+
(_DEEZER_ALBUM_ID, False),
459+
(_DEEZER_TRACK_ID, False),
460+
# Edge cases
461+
(None, False),
462+
("", False),
463+
([], False),
464+
],
465+
)
466+
def test_is_valid_mb_id(value, expected):
467+
assert _is_valid_mb_id(value) == expected
468+
469+
470+
# Regression tests for https://github.com/beetbox/beets/issues/6519:
471+
# non-MB sources must not write their IDs into MusicBrainz tags.
472+
473+
474+
@pytest.mark.parametrize(
475+
"album_id, artist_id, track_id",
476+
[
477+
(_SPOTIFY_ALBUM_ID, _SPOTIFY_ARTIST_ID, _SPOTIFY_TRACK_ID),
478+
(_DEEZER_ALBUM_ID, _DEEZER_ARTIST_ID, _DEEZER_TRACK_ID),
479+
],
480+
ids=["spotify", "deezer"],
481+
)
482+
def test_non_mb_album_ids_not_in_mb_fields(
483+
config, album_id, artist_id, track_id
484+
):
485+
"""Non-MB album/artist IDs must not appear in MusicBrainz tag fields."""
486+
info = AlbumInfo(
487+
tracks=[
488+
TrackInfo(
489+
title="Track",
490+
track_id=track_id,
491+
artist_id=artist_id,
492+
medium=1,
493+
medium_index=1,
494+
medium_total=1,
495+
index=1,
496+
)
497+
],
498+
album="Album",
499+
album_id=album_id,
500+
artist="Artist",
501+
artist_id=artist_id,
502+
year=2024,
503+
)
504+
data = info.item_data
505+
assert "mb_albumid" not in data
506+
assert "mb_albumartistid" not in data
507+
assert "mb_albumartistids" not in data
508+
509+
510+
@pytest.mark.parametrize(
511+
"track_id, artist_id",
512+
[
513+
(_SPOTIFY_TRACK_ID, _SPOTIFY_ARTIST_ID),
514+
(_DEEZER_TRACK_ID, _DEEZER_ARTIST_ID),
515+
],
516+
ids=["spotify", "deezer"],
517+
)
518+
def test_non_mb_track_ids_not_in_mb_fields(config, track_id, artist_id):
519+
"""Non-MB track/artist IDs must not appear in MusicBrainz tag fields."""
520+
track = TrackInfo(
521+
title="Track",
522+
track_id=track_id,
523+
artist_id=artist_id,
524+
medium=1,
525+
medium_index=1,
526+
index=1,
527+
)
528+
data = track.item_data
529+
assert "mb_trackid" not in data
530+
assert "mb_artistid" not in data
531+
assert "mb_artistids" not in data
532+
533+
534+
def test_valid_mb_album_uuids_are_written(config):
535+
"""Regression: valid MusicBrainz UUIDs must still be written to MB tags."""
536+
info = AlbumInfo(
537+
tracks=[
538+
TrackInfo(
539+
title="Track",
540+
track_id=_VALID_MB_TRACK_UUID,
541+
artist_id=_VALID_MB_ARTIST_UUID,
542+
medium=1,
543+
medium_index=1,
544+
medium_total=1,
545+
index=1,
546+
)
547+
],
548+
album="Album",
549+
album_id=_VALID_MB_ALBUM_UUID,
550+
artist="Artist",
551+
artist_id=_VALID_MB_ARTIST_UUID,
552+
data_source="MusicBrainz",
553+
year=2024,
554+
)
555+
data = info.item_data
556+
assert data["mb_albumid"] == _VALID_MB_ALBUM_UUID
557+
assert data["mb_albumartistid"] == _VALID_MB_ARTIST_UUID
558+
559+
560+
def test_valid_mb_track_uuid_is_written(config):
561+
"""Regression: valid MusicBrainz UUID in track_id must still be written."""
562+
track = TrackInfo(
563+
title="Track",
564+
track_id=_VALID_MB_TRACK_UUID,
565+
artist_id=_VALID_MB_ARTIST_UUID,
566+
medium=1,
567+
medium_index=1,
568+
index=1,
569+
)
570+
data = track.item_data
571+
assert data["mb_trackid"] == _VALID_MB_TRACK_UUID
572+
assert data["mb_artistid"] == _VALID_MB_ARTIST_UUID

0 commit comments

Comments
 (0)