Skip to content

Commit 9cd4947

Browse files
committed
lastgenre: Load configured blacklist file
Uses a custom text file format since YAML, INI, TOML, ... all have their flaws with parsing regex patterns.
1 parent a3aa5ac commit 9cd4947

1 file changed

Lines changed: 58 additions & 0 deletions

File tree

beetsplug/lastgenre/__init__.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@
2525
import codecs
2626
import os
2727
import traceback
28+
from collections import defaultdict
2829
from typing import Union
2930

3031
import pylast
3132
import yaml
3233

3334
from beets import config, library, plugins, ui
3435
from beets.library import Album, Item
36+
from beets.ui import UserError
3537
from beets.util import normpath, plurality, unique_list
3638

3739
LASTFM = pylast.LastFMNetwork(api_key=plugins.LASTFM_KEY)
@@ -105,6 +107,7 @@ def __init__(self):
105107
"prefer_specific": False,
106108
"title_case": True,
107109
"extended_debug": False,
110+
"blacklist": False,
108111
}
109112
)
110113
self.setup()
@@ -117,6 +120,7 @@ def setup(self):
117120
self._genre_cache = {}
118121
self.whitelist = self._load_whitelist()
119122
self.c14n_branches, self.canonicalize = self._load_c14n_tree()
123+
self.blacklist = self._load_blacklist()
120124

121125
def _load_whitelist(self):
122126
whitelist = set()
@@ -151,6 +155,60 @@ def _load_c14n_tree(self):
151155
flatten_tree(genres_tree, [], c14n_branches)
152156
return c14n_branches, canonicalize
153157

158+
def _load_blacklist(self):
159+
"""Load the blacklist from a configured file path.
160+
161+
For maximum compatibility with regex patterns, a custom format is used:
162+
- Each section starts with an artist name, followed by a colon.
163+
- Subsequent lines are indented (at least one space, typically 4 spaces) and
164+
contain a regex pattern to match a genre.
165+
166+
Eg.:
167+
artist name 1:
168+
genre pattern 1
169+
genre pattern 2
170+
artist name 2:
171+
genre pattern 3
172+
173+
Raises:
174+
UserError: if the file format is invalid.
175+
"""
176+
blacklist = defaultdict(list)
177+
if not (bl_filename := self.config["blacklist"].get()):
178+
return blacklist
179+
if not os.path.isfile(bl_filename := normpath(bl_filename)):
180+
self._log.error("Blacklist file not found: {} .", bl_filename)
181+
return blacklist
182+
183+
self._log.debug("Loading blacklist file {0}", bl_filename)
184+
section = None
185+
with open(bl_filename, "rb") as f:
186+
for lineno, line in enumerate(f, 1):
187+
# Do not strip leading spaces yet
188+
if not line.strip() or line.lstrip().startswith(b"#"):
189+
continue
190+
if not line.startswith(b' '):
191+
# Section header
192+
if not line.rstrip().endswith(b':'):
193+
raise UserError(
194+
f"Malformed blacklist section header "
195+
f"at line {lineno}: {line.decode('utf-8', 'replace')}"
196+
)
197+
section = line.rstrip(b':\r\n').decode("utf-8", "replace")
198+
else:
199+
# Pattern line: must be indented (at least one space)
200+
if section is None:
201+
raise UserError(
202+
f"Blacklist regex pattern line before any section header "
203+
f"at line {lineno}: {line.decode('utf-8', 'replace')}"
204+
)
205+
blacklist[section].append(
206+
line.strip().decode("utf-8", "replace")
207+
)
208+
if self.config["extended_debug"]:
209+
self._log.debug("Blacklist: {}", blacklist)
210+
return blacklist
211+
154212
@property
155213
def sources(self) -> tuple[str, ...]:
156214
"""A tuple of allowed genre sources. May contain 'track',

0 commit comments

Comments
 (0)