Skip to content

Commit 5662668

Browse files
authored
Merge pull request #563 from danieldotnl/feature/551-scrape-context
Simplify variable system with typed ScrapeContext (#551)
2 parents df35bec + 2712337 commit 5662668

11 files changed

Lines changed: 287 additions & 16 deletions

File tree

custom_components/multiscrape/binary_sensor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def _update_sensor(self):
129129
"Skipped scraping because data couldn't be updated")
130130

131131
value = self.scraper.scrape(
132-
self._sensor_selector, self._name, variables=self.coordinator.form_variables)
132+
self._sensor_selector, self._name, context=self.coordinator.scrape_context)
133133
try:
134134
self._attr_is_on = bool(int(value))
135135
except (ValueError, TypeError):

custom_components/multiscrape/coordinator.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from .const import DOMAIN, MAX_RETRIES, RETRY_DELAY_SECONDS
1717
from .file import LoggingFileManager
1818
from .http_session import HttpSession
19+
from .scrape_context import ScrapeContext
1920
from .scraper import Scraper
2021
from .util import create_renderer
2122

@@ -76,8 +77,9 @@ async def get_content(self) -> str:
7677
ex,
7778
)
7879

80+
scrape_ctx = ScrapeContext(form_variables=self._session.form_variables)
7981
response = await self._session.async_request(
80-
"page", resource, variables=self._session.form_variables
82+
"page", resource, scrape_context=scrape_ctx
8183
)
8284
return response.text
8385

@@ -222,6 +224,6 @@ async def _prepare_new_run(self) -> None:
222224
self._scraper.reset()
223225

224226
@property
225-
def form_variables(self) -> dict:
226-
"""Return the form variables."""
227-
return self._request_manager.form_variables
227+
def scrape_context(self) -> ScrapeContext:
228+
"""Return the current scrape context with form variables."""
229+
return ScrapeContext(form_variables=self._request_manager.form_variables)

custom_components/multiscrape/entity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def _update_attributes(self):
143143
for name, attr_selector in self._attribute_selectors.items():
144144
try:
145145
attr_value = self.scraper.scrape(
146-
attr_selector, self._name, name, variables=self.coordinator.form_variables)
146+
attr_selector, self._name, name, context=self.coordinator.scrape_context)
147147
self._attr_extra_state_attributes[name] = attr_value
148148
except Exception as exception:
149149
_LOGGER.debug(

custom_components/multiscrape/http_session.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
CONF_FORM_VARIABLES, CONF_PARSER)
2222
from .file import LoggingFileManager
2323
from .http import merge_url_with_params
24+
from .scrape_context import ScrapeContext
2425
from .scraper import create_scraper
2526
from .selector import Selector
2627
from .util import create_dict_renderer, create_renderer
@@ -114,12 +115,13 @@ async def async_request(
114115
resource: str,
115116
method: str | None = None,
116117
request_data: Any = None,
117-
variables: dict = {},
118+
scrape_context: ScrapeContext | None = None,
118119
) -> httpx.Response:
119120
"""Execute an HTTP request.
120121
121122
Cookies are managed automatically by the dedicated httpx client.
122123
"""
124+
variables = scrape_context.to_template_variables() if scrape_context else {}
123125
data = request_data or self._http_config.data_renderer(variables)
124126
method = method or self._http_config.method or "GET"
125127
headers = self._http_config.headers_renderer(variables)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""Typed context for variable passing during scrape operations."""
2+
from __future__ import annotations
3+
4+
from dataclasses import dataclass, field
5+
from typing import Any
6+
7+
8+
@dataclass(frozen=True)
9+
class ScrapeContext:
10+
"""Immutable context carrying variables through the scrape pipeline.
11+
12+
Replaces the untyped dict[str, Any] that was previously passed through
13+
the entire call chain. Being frozen prevents the mutation bug where
14+
variables["value"] was modified in-place.
15+
"""
16+
17+
form_variables: dict[str, Any] = field(default_factory=dict)
18+
current_value: Any = None
19+
20+
def with_current_value(self, value: Any) -> ScrapeContext:
21+
"""Return a new context with current_value set."""
22+
return ScrapeContext(
23+
form_variables=self.form_variables,
24+
current_value=value,
25+
)
26+
27+
def to_template_variables(self) -> dict[str, Any]:
28+
"""Convert to the flat dict that HA templates expect.
29+
30+
Form variables are included first, then 'value' is added on top
31+
if current_value is set. This preserves backward compatibility
32+
with existing user templates.
33+
"""
34+
result = dict(self.form_variables)
35+
if self.current_value is not None:
36+
result["value"] = self.current_value
37+
return result
38+
39+
@staticmethod
40+
def empty() -> ScrapeContext:
41+
"""Create an empty context."""
42+
return ScrapeContext()

custom_components/multiscrape/scraper.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from bs4 import BeautifulSoup
55

66
from .const import CONF_PARSER, CONF_SEPARATOR
7+
from .scrape_context import ScrapeContext
78

89
DEFAULT_TIMEOUT = 10
910
_LOGGER = logging.getLogger(__name__)
@@ -97,8 +98,11 @@ async def set_content(self, content):
9798
)
9899
raise
99100

100-
def scrape(self, selector, sensor, attribute=None, variables: dict = {}):
101+
def scrape(self, selector, sensor, attribute=None, context: ScrapeContext | None = None):
101102
"""Scrape based on given selector the data."""
103+
if context is None:
104+
context = ScrapeContext.empty()
105+
102106
# This is required as this function is called separately for sensors and attributes
103107
log_prefix = f"{self._config_name} # {sensor}"
104108
if attribute:
@@ -107,7 +111,7 @@ def scrape(self, selector, sensor, attribute=None, variables: dict = {}):
107111
if selector.just_value:
108112
_LOGGER.debug("%s # Applying value_template only.", log_prefix)
109113
result = selector.value_template.async_render_with_possible_json_value(
110-
self._data, None, variables=variables
114+
self._data, None, variables=context.to_template_variables()
111115
)
112116
return selector.value_template._parse_result(result)
113117

@@ -152,8 +156,9 @@ def scrape(self, selector, sensor, attribute=None, variables: dict = {}):
152156
if value is not None and selector.value_template is not None:
153157
_LOGGER.debug(
154158
"%s # Applying value_template on selector result", log_prefix)
155-
variables["value"] = value
156-
value = selector.value_template.async_render(variables=variables, parse_result=True
159+
render_ctx = context.with_current_value(value)
160+
value = selector.value_template.async_render(
161+
variables=render_ctx.to_template_variables(), parse_result=True
157162
)
158163

159164
_LOGGER.debug(

custom_components/multiscrape/sensor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def _update_sensor(self):
137137
"Skipped scraping because data couldn't be updated")
138138

139139
value = self.scraper.scrape(
140-
self._sensor_selector, self._name, variables=self.coordinator.form_variables)
140+
self._sensor_selector, self._name, context=self.coordinator.scrape_context)
141141
_LOGGER.debug(
142142
"%s # %s # Selected: %s", self.scraper.name, self._name, value
143143
)

tests/test_coordinator.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from custom_components.multiscrape.const import MAX_RETRIES
99
from custom_components.multiscrape.coordinator import (
1010
ContentRequestManager, MultiscrapeDataUpdateCoordinator)
11+
from custom_components.multiscrape.scrape_context import ScrapeContext
1112

1213

1314
@pytest.mark.unit
@@ -343,9 +344,12 @@ async def test_coordinator_prepare_new_run_clears_state(
343344
@pytest.mark.unit
344345
@pytest.mark.async_test
345346
@pytest.mark.timeout(5)
346-
async def test_coordinator_form_variables_delegates_to_session(
347+
async def test_coordinator_scrape_context_wraps_form_variables(
347348
coordinator, mock_http_session
348349
):
349-
"""Test that coordinator.form_variables returns session.form_variables."""
350+
"""Test that coordinator.scrape_context wraps session.form_variables in a ScrapeContext."""
350351
mock_http_session.form_variables = {"x-token": "abc123"}
351-
assert coordinator.form_variables == {"x-token": "abc123"}
352+
ctx = coordinator.scrape_context
353+
assert isinstance(ctx, ScrapeContext)
354+
assert ctx.form_variables == {"x-token": "abc123"}
355+
assert ctx.current_value is None

tests/test_http_session.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
HttpConfig,
1313
HttpSession,
1414
create_http_session)
15+
from custom_components.multiscrape.scrape_context import ScrapeContext
1516

1617
# ============================================================================
1718
# Fixtures
@@ -342,7 +343,7 @@ def capture_headers(variables={}, parse_result=None):
342343
url = "https://example.com/api"
343344
respx.get(url).mock(return_value=respx.MockResponse(200, text="OK"))
344345

345-
await sess.async_request("test", url, variables={"token": "xyz"})
346+
await sess.async_request("test", url, scrape_context=ScrapeContext(form_variables={"token": "xyz"}))
346347

347348
assert received_vars == {"token": "xyz"}
348349

tests/test_scrape_context.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
"""Tests for ScrapeContext dataclass."""
2+
import dataclasses
3+
4+
import pytest
5+
6+
from custom_components.multiscrape.scrape_context import ScrapeContext
7+
8+
# ============================================================================
9+
# Construction Tests
10+
# ============================================================================
11+
12+
13+
@pytest.mark.unit
14+
def test_empty_context():
15+
"""Test ScrapeContext.empty() creates context with defaults."""
16+
ctx = ScrapeContext.empty()
17+
assert ctx.form_variables == {}
18+
assert ctx.current_value is None
19+
20+
21+
@pytest.mark.unit
22+
def test_context_with_form_variables():
23+
"""Test ScrapeContext preserves form variables."""
24+
ctx = ScrapeContext(form_variables={"token": "abc123", "session": "xyz"})
25+
assert ctx.form_variables == {"token": "abc123", "session": "xyz"}
26+
assert ctx.current_value is None
27+
28+
29+
@pytest.mark.unit
30+
def test_context_with_current_value():
31+
"""Test ScrapeContext can be constructed with a current value."""
32+
ctx = ScrapeContext(current_value="42")
33+
assert ctx.form_variables == {}
34+
assert ctx.current_value == "42"
35+
36+
37+
# ============================================================================
38+
# Immutability Tests
39+
# ============================================================================
40+
41+
42+
@pytest.mark.unit
43+
def test_frozen_cannot_set_form_variables():
44+
"""Test that ScrapeContext is frozen and fields cannot be mutated."""
45+
ctx = ScrapeContext(form_variables={"key": "value"})
46+
with pytest.raises(dataclasses.FrozenInstanceError):
47+
ctx.form_variables = {"other": "thing"}
48+
49+
50+
@pytest.mark.unit
51+
def test_frozen_cannot_set_current_value():
52+
"""Test that current_value cannot be mutated on a frozen context."""
53+
ctx = ScrapeContext()
54+
with pytest.raises(dataclasses.FrozenInstanceError):
55+
ctx.current_value = "new_value"
56+
57+
58+
# ============================================================================
59+
# with_current_value Tests
60+
# ============================================================================
61+
62+
63+
@pytest.mark.unit
64+
def test_with_current_value_returns_new_instance():
65+
"""Test with_current_value returns a new context, leaving original unchanged."""
66+
original = ScrapeContext(form_variables={"token": "abc"})
67+
updated = original.with_current_value("scraped_data")
68+
69+
assert updated is not original
70+
assert original.current_value is None
71+
assert updated.current_value == "scraped_data"
72+
assert updated.form_variables == {"token": "abc"}
73+
74+
75+
@pytest.mark.unit
76+
def test_with_current_value_preserves_form_variables():
77+
"""Test with_current_value copies form variables to the new instance."""
78+
form_vars = {"a": "1", "b": "2"}
79+
ctx = ScrapeContext(form_variables=form_vars).with_current_value("val")
80+
assert ctx.form_variables == {"a": "1", "b": "2"}
81+
82+
83+
# ============================================================================
84+
# to_template_variables Tests
85+
# ============================================================================
86+
87+
88+
@pytest.mark.unit
89+
def test_to_template_variables_empty():
90+
"""Test empty context produces empty dict."""
91+
assert ScrapeContext.empty().to_template_variables() == {}
92+
93+
94+
@pytest.mark.unit
95+
def test_to_template_variables_form_only():
96+
"""Test context with form variables but no current value."""
97+
ctx = ScrapeContext(form_variables={"token": "abc", "user": "admin"})
98+
result = ctx.to_template_variables()
99+
assert result == {"token": "abc", "user": "admin"}
100+
101+
102+
@pytest.mark.unit
103+
def test_to_template_variables_with_current_value():
104+
"""Test context with current value includes 'value' key."""
105+
ctx = ScrapeContext(
106+
form_variables={"token": "abc"},
107+
current_value="42",
108+
)
109+
result = ctx.to_template_variables()
110+
assert result == {"token": "abc", "value": "42"}
111+
112+
113+
@pytest.mark.unit
114+
def test_to_template_variables_current_value_overrides_form_variable():
115+
"""Test that current_value wins when form variables also has a 'value' key."""
116+
ctx = ScrapeContext(
117+
form_variables={"value": "from_form"},
118+
current_value="from_scrape",
119+
)
120+
result = ctx.to_template_variables()
121+
assert result == {"value": "from_scrape"}
122+
123+
124+
@pytest.mark.unit
125+
def test_to_template_variables_no_value_key_when_none():
126+
"""Test that 'value' key is NOT added when current_value is None."""
127+
ctx = ScrapeContext(form_variables={"token": "abc"})
128+
result = ctx.to_template_variables()
129+
assert "value" not in result
130+
131+
132+
@pytest.mark.unit
133+
def test_to_template_variables_returns_copy():
134+
"""Test that modifying the returned dict does not affect the context."""
135+
ctx = ScrapeContext(form_variables={"token": "abc"})
136+
result = ctx.to_template_variables()
137+
result["injected"] = "hack"
138+
assert "injected" not in ctx.form_variables

0 commit comments

Comments
 (0)