Skip to content

Commit 60ea438

Browse files
authored
Merge pull request #572 from danieldotnl/feature/554-scraper-registry
Replace index-based discovery with ScraperRegistry (#554)
2 parents 37e9d4b + 4b8f3d0 commit 60ea438

5 files changed

Lines changed: 319 additions & 55 deletions

File tree

custom_components/multiscrape/__init__.py

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,14 @@
1414
from homeassistant.helpers import discovery
1515
from homeassistant.helpers.reload import (async_integration_yaml_config,
1616
async_reload_integration_platforms)
17+
from homeassistant.util import slugify
1718

18-
from .const import (CONF_LOG_RESPONSE, COORDINATOR, DOMAIN, PLATFORM_IDX,
19-
SCRAPER, SCRAPER_DATA, SCRAPER_IDX)
19+
from .const import CONF_LOG_RESPONSE, DOMAIN, ENTITY_KEY, SCRAPER_ID
2020
from .coordinator import (create_content_request_manager,
2121
create_multiscrape_coordinator)
2222
from .file import create_file_manager
2323
from .http_session import create_http_session
24+
from .registry import ScraperInstance, ScraperRegistry
2425
from .schema import COMBINED_SCHEMA, CONFIG_SCHEMA # noqa: F401
2526
from .scraper import create_scraper
2627
from .service import setup_config_services, setup_integration_services
@@ -65,8 +66,8 @@ async def reload_service_handler(service):
6566

6667

6768
def _async_setup_shared_data(hass: HomeAssistant):
68-
"""Create shared data for platform config and scraper coordinators."""
69-
hass.data[DOMAIN] = {key: [] for key in [SCRAPER_DATA, *PLATFORMS]}
69+
"""Create a fresh ScraperRegistry for platform config and scraper coordinators."""
70+
hass.data[DOMAIN] = ScraperRegistry()
7071

7172

7273
async def _async_process_config(hass: HomeAssistant, config) -> bool:
@@ -76,19 +77,25 @@ async def _async_process_config(hass: HomeAssistant, config) -> bool:
7677

7778
refresh_tasks = []
7879
load_tasks = []
80+
registry: ScraperRegistry = hass.data[DOMAIN]
7981

8082
for scraper_idx, conf in enumerate(config[DOMAIN]):
8183
config_name = conf.get(CONF_NAME)
8284
if config_name is None:
83-
config_name = f"Scraper_noname_{scraper_idx}"
85+
resource = conf.get(CONF_RESOURCE) or ""
86+
config_name = (
87+
f"scraper_{slugify(resource)}" if resource else f"scraper_unnamed_{scraper_idx}"
88+
)
8489
_LOGGER.debug(
85-
"# Found no name for scraper, generated a unique name: %s", config_name
90+
"# Found no name for scraper, generated name: %s", config_name
8691
)
8792

8893
_LOGGER.debug(
8994
"%s # Setting up multiscrape with config:\n %s", config_name, conf
9095
)
9196

97+
scraper_id = _deduplicate_id(registry, config_name)
98+
9299
file_manager = await create_file_manager(hass, config_name, conf.get(CONF_LOG_RESPONSE))
93100
session = create_http_session(config_name, conf, hass, file_manager)
94101
scraper = create_scraper(config_name, conf, hass, file_manager)
@@ -108,9 +115,12 @@ async def _shutdown_session(_event, _session=session):
108115

109116
hass.bus.async_listen_once(EVENT_HOMEASSISTANT_STOP, _shutdown_session)
110117

111-
hass.data[DOMAIN][SCRAPER_DATA].append(
112-
{SCRAPER: scraper, COORDINATOR: coordinator}
118+
instance = ScraperInstance(
119+
scraper_id=scraper_id,
120+
scraper=scraper,
121+
coordinator=coordinator,
113122
)
123+
registry.register(instance)
114124

115125
await setup_config_services(hass, coordinator, config_name)
116126

@@ -119,14 +129,18 @@ async def _shutdown_session(_event, _session=session):
119129
continue
120130

121131
for platform_conf in conf[platform_domain]:
122-
hass.data[DOMAIN][platform_domain].append(platform_conf)
123-
platform_idx = len(hass.data[DOMAIN][platform_domain]) - 1
132+
entity_name = platform_conf.get(CONF_NAME, "")
133+
entity_key = slugify(entity_name) if entity_name else f"entity_{id(platform_conf)}"
134+
135+
platform_dict = instance.platform_configs.setdefault(platform_domain, {})
136+
entity_key = _deduplicate_entity_key(platform_dict, entity_key)
137+
platform_dict[entity_key] = platform_conf
124138

125139
load = discovery.async_load_platform(
126140
hass,
127141
platform_domain,
128142
DOMAIN,
129-
{SCRAPER_IDX: scraper_idx, PLATFORM_IDX: platform_idx},
143+
{SCRAPER_ID: scraper_id, ENTITY_KEY: entity_key},
130144
config,
131145
)
132146
load_tasks.append(load)
@@ -141,10 +155,37 @@ async def _shutdown_session(_event, _session=session):
141155
return True
142156

143157

158+
def _deduplicate_id(registry: ScraperRegistry, base_id: str) -> str:
159+
"""Return a unique scraper ID, appending a suffix if needed."""
160+
if not registry.contains(base_id):
161+
return base_id
162+
suffix = 2
163+
while registry.contains(f"{base_id}_{suffix}"):
164+
suffix += 1
165+
deduped = f"{base_id}_{suffix}"
166+
_LOGGER.warning(
167+
"Duplicate scraper name '%s', using '%s' instead", base_id, deduped
168+
)
169+
return deduped
170+
171+
172+
def _deduplicate_entity_key(platform_dict: dict, base_key: str) -> str:
173+
"""Return a unique entity key within a platform, appending a suffix if needed."""
174+
if base_key not in platform_dict:
175+
return base_key
176+
suffix = 2
177+
while f"{base_key}_{suffix}" in platform_dict:
178+
suffix += 1
179+
deduped = f"{base_key}_{suffix}"
180+
_LOGGER.warning(
181+
"Duplicate entity name '%s', using '%s' instead", base_key, deduped
182+
)
183+
return deduped
184+
185+
144186
async def async_get_config_and_coordinator(hass, platform_domain, discovery_info):
145187
"""Get the config and coordinator for the platform from discovery."""
146-
shared_data = hass.data[DOMAIN][SCRAPER_DATA][discovery_info[SCRAPER_IDX]]
147-
conf = hass.data[DOMAIN][platform_domain][discovery_info[PLATFORM_IDX]]
148-
coordinator = shared_data[COORDINATOR]
149-
scraper = shared_data[SCRAPER]
150-
return conf, coordinator, scraper
188+
registry: ScraperRegistry = hass.data[DOMAIN]
189+
instance = registry.get(discovery_info[SCRAPER_ID])
190+
conf = instance.platform_configs[platform_domain][discovery_info[ENTITY_KEY]]
191+
return conf, instance.coordinator, instance.scraper

custom_components/multiscrape/const.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,8 @@
4040

4141
CONF_FIELDS = "fields"
4242

43-
SCRAPER_IDX = "scraper_idx"
44-
PLATFORM_IDX = "platform_idx"
45-
46-
COORDINATOR = "coordinator"
47-
SCRAPER = "scraper"
48-
49-
SCRAPER_DATA = "scraper"
43+
SCRAPER_ID = "scraper_id"
44+
ENTITY_KEY = "entity_key"
5045

5146
METHODS = ["POST", "GET", "PUT"]
5247
DEFAULT_SEPARATOR = ","
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""ScraperRegistry for type-safe, reload-safe scraper lookup."""
2+
from __future__ import annotations
3+
4+
import logging
5+
from dataclasses import dataclass, field
6+
from typing import TYPE_CHECKING
7+
8+
from homeassistant.const import Platform
9+
10+
if TYPE_CHECKING:
11+
from .coordinator import MultiscrapeDataUpdateCoordinator
12+
from .scraper import Scraper
13+
14+
_LOGGER = logging.getLogger(__name__)
15+
16+
17+
@dataclass
18+
class ScraperInstance:
19+
"""Hold all data for a single scraper configuration."""
20+
21+
scraper_id: str
22+
scraper: Scraper
23+
coordinator: MultiscrapeDataUpdateCoordinator
24+
platform_configs: dict[Platform, dict[str, dict]] = field(default_factory=dict)
25+
26+
27+
class ScraperRegistry:
28+
"""Registry for scraper instances, replacing index-based lookups."""
29+
30+
def __init__(self):
31+
"""Initialize an empty registry."""
32+
self._scrapers: dict[str, ScraperInstance] = {}
33+
34+
def contains(self, scraper_id: str) -> bool:
35+
"""Check if a scraper ID is already registered."""
36+
return scraper_id in self._scrapers
37+
38+
def register(self, instance: ScraperInstance) -> None:
39+
"""Register a scraper instance by its unique ID."""
40+
if instance.scraper_id in self._scrapers:
41+
raise ValueError(
42+
f"Scraper '{instance.scraper_id}' is already registered"
43+
)
44+
self._scrapers[instance.scraper_id] = instance
45+
_LOGGER.debug("Registered scraper: %s", instance.scraper_id)
46+
47+
def get(self, scraper_id: str) -> ScraperInstance:
48+
"""Get a scraper instance by its unique ID."""
49+
return self._scrapers[scraper_id]
50+
51+
def get_all(self) -> list[ScraperInstance]:
52+
"""Get all registered scraper instances."""
53+
return list(self._scrapers.values())
54+
55+
def clear(self) -> None:
56+
"""Remove all registered scrapers."""
57+
self._scrapers.clear()

0 commit comments

Comments
 (0)