Skip to content

Commit 3b89d72

Browse files
authored
Fix mb search term formatting (#6354)
Fixes #6347 - Fixed MusicBrainz Lucene query formatting in `MusicBrainzAPI.format_search_term()` (lowercase + trim + escape Lucene special chars). - Fixed `plugins.musicbrainz:extra_tags` support by mapping `alias` and `tracks` into MusicBrainz search fields. - Adjusted logging to make MusicBrainz API logs visible under the shared `beets` logger (and removed an unused per-module logger in `beetsplug.bpd`).
2 parents bf6c0a1 + 8f81e1d commit 3b89d72

8 files changed

Lines changed: 64 additions & 15 deletions

File tree

.github/CODEOWNERS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,11 @@
33

44
# Specific ownerships:
55
/beets/metadata_plugins.py @semohr
6+
67
/beetsplug/titlecase.py @henry-oberholtzer
8+
79
/beetsplug/mbpseudo.py @asardaes
10+
11+
/beetsplug/_utils/requests.py @snejus
12+
/beetsplug/_utils/musicbrainz.py @snejus
13+
/beetsplug/musicbrainz.py @snejus

beetsplug/_utils/musicbrainz.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@
1111
from __future__ import annotations
1212

1313
import operator
14+
import re
1415
from dataclasses import dataclass, field
1516
from functools import cached_property, singledispatchmethod, wraps
16-
from itertools import groupby
17+
from itertools import groupby, starmap
1718
from typing import TYPE_CHECKING, Any, Literal, ParamSpec, TypedDict, TypeVar
1819

1920
from requests_ratelimiter import LimiterMixin
@@ -30,7 +31,10 @@
3031

3132
from .._typing import JSONDict
3233

33-
log = logging.getLogger(__name__)
34+
log = logging.getLogger("beets")
35+
36+
37+
LUCENE_SPECIAL_CHAR_PAT = re.compile(r'([-+&|!(){}[\]^"~*?:\\/])')
3438

3539

3640
class LimiterTimeoutSession(LimiterMixin, TimeoutAndRetrySession):
@@ -181,6 +185,21 @@ def _lookup(
181185
def _browse(self, entity: Entity, **kwargs) -> list[JSONDict]:
182186
return self._get_resource(entity, **kwargs).get(f"{entity}s", [])
183187

188+
@staticmethod
189+
def format_search_term(field: str, term: str) -> str:
190+
"""Format a search term for the MusicBrainz API.
191+
192+
See https://lucene.apache.org/core/4_3_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html
193+
"""
194+
if not (term := term.lower().strip()):
195+
return ""
196+
197+
term = LUCENE_SPECIAL_CHAR_PAT.sub(r"\\\1", term)
198+
if field:
199+
term = f"{field}:({term})"
200+
201+
return term
202+
184203
def search(
185204
self,
186205
entity: Entity,
@@ -195,10 +214,8 @@ def search(
195214
- 'value' is empty, in which case the filter is ignored
196215
* Values are lowercased and stripped of whitespace.
197216
"""
198-
query = " AND ".join(
199-
":".join(filter(None, (k, f'"{_v}"')))
200-
for k, v in filters.items()
201-
if (_v := v.lower().strip())
217+
query = " ".join(
218+
filter(None, starmap(self.format_search_term, filters.items()))
202219
)
203220
log.debug("Searching for MusicBrainz {}s with: {!r}", entity, query)
204221
kwargs["query"] = query

beetsplug/bpd/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
import beets
3232
import beets.ui
33-
from beets import dbcore, logging
33+
from beets import dbcore
3434
from beets.library import Item
3535
from beets.plugins import BeetsPlugin
3636
from beets.util import as_string, bluelet
@@ -39,8 +39,6 @@
3939
if TYPE_CHECKING:
4040
from beets.dbcore.query import Query
4141

42-
log = logging.getLogger(__name__)
43-
4442

4543
try:
4644
from . import gstplayer

beetsplug/musicbrainz.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656
"label": "label",
5757
"media": "format",
5858
"year": "date",
59+
"tracks": "tracks",
60+
"alias": "alias",
5961
}
6062

6163

docs/changelog.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ New features:
1111

1212
Bug fixes:
1313

14+
- :doc:`plugins/musicbrainz`: Fix search terms escaping. :bug:`6347`
15+
- :doc:`plugins/musicbrainz`: Fix support for ``alias`` and ``tracks``
16+
:conf:`plugins.musicbrainz:extra_tags`.
17+
1418
For packagers:
1519

1620
Other changes:

docs/plugins/index.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,15 +100,15 @@ databases. They share the following configuration options:
100100
listenbrainz
101101
loadext
102102
lyrics
103-
mbcollection
104-
mbpseudo
105-
mbsubmit
106103
mbsync
107104
metasync
108105
missing
109106
mpdstats
110107
mpdupdate
111108
musicbrainz
109+
mbcollection
110+
mbpseudo
111+
mbsubmit
112112
parentwork
113113
permissions
114114
play

docs/plugins/musicbrainz.rst

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,23 @@ Default
9393
This setting should improve the autotagger results if the metadata with the
9494
given tags match the metadata returned by MusicBrainz.
9595

96-
Note that the only tags supported by this setting are: ``barcode``,
97-
``catalognum``, ``country``, ``label``, ``media``, and ``year``.
96+
Tags supported by this setting:
97+
98+
* ``alias`` (also search for release aliases matching the query)
99+
* ``barcode``
100+
* ``catalognum``
101+
* ``country``
102+
* ``label``
103+
* ``media``
104+
* ``tracks`` (number of tracks on the release)
105+
* ``year``
98106

99107
Example:
100108

101109
.. code-block:: yaml
102110
103111
musicbrainz:
104-
extra_tags: [barcode, catalognum, country, label, media, year]
112+
extra_tags: [alias, barcode, catalognum, country, label, media, tracks, year]
105113
106114
.. conf:: genres
107115
:default: no

test/plugins/utils/test_musicbrainz.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import pytest
2+
13
from beetsplug._utils.musicbrainz import MusicBrainzAPI
24

35

@@ -80,3 +82,15 @@ def test_group_relations():
8082
},
8183
],
8284
}
85+
86+
87+
@pytest.mark.parametrize(
88+
"field, term, expected",
89+
[
90+
("artist", ' AC/DC + "[Live]" ', r"artist:(ac\/dc \+ \"\[live\]\")"),
91+
("", "Foo:Bar", r"foo\:bar"),
92+
("artist", " ", ""),
93+
],
94+
)
95+
def test_format_search_term(field, term, expected):
96+
assert MusicBrainzAPI.format_search_term(field, term) == expected

0 commit comments

Comments
 (0)