Skip to content

Commit ac65364

Browse files
authored
[Fixes #14248] Fix proxy registry initialization (#14249)
1 parent e9553e8 commit ac65364

2 files changed

Lines changed: 99 additions & 20 deletions

File tree

geonode/proxy/tests.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import json
2727
import io
2828
import zipfile
29+
from threading import Event, Thread
2930

3031
from urllib.parse import urljoin
3132

@@ -35,6 +36,7 @@
3536
from geonode.proxy.templatetags.proxy_lib_tags import original_link_available
3637
from django.test.client import RequestFactory
3738
from django.core.files.uploadedfile import SimpleUploadedFile
39+
from django.utils.timezone import now
3840
from unittest.mock import patch
3941

4042
try:
@@ -70,6 +72,55 @@ def setUp(self):
7072
super().setUp()
7173
self.admin = get_user_model().objects.get(username="admin")
7274

75+
def test_register_host_lazy_initializes_registry(self):
76+
registry = ProxyUrlsRegistry()
77+
78+
def initialize_registry():
79+
registry.proxy_allowed_hosts = ["existing.test"]
80+
registry._last_registry_load = now()
81+
82+
registry._initialize = MagicMock(side_effect=initialize_registry)
83+
84+
registry.register_host("remote.test")
85+
86+
registry._initialize.assert_called_once()
87+
self.assertEqual({"existing.test", "remote.test"}, registry.proxy_allowed_hosts)
88+
89+
def test_concurrent_lazy_initialization_runs_once(self):
90+
registry = ProxyUrlsRegistry()
91+
initialization_started = Event()
92+
release_initialization = Event()
93+
errors = []
94+
95+
def initialize_registry():
96+
initialization_started.set()
97+
release_initialization.wait(5)
98+
registry.proxy_allowed_hosts = ["existing.test"]
99+
registry._last_registry_load = now()
100+
101+
registry._initialize = MagicMock(side_effect=initialize_registry)
102+
103+
def read_registry():
104+
try:
105+
registry.proxy_allowed_hosts
106+
except Exception as exc:
107+
errors.append(exc)
108+
109+
first_thread = Thread(target=read_registry)
110+
second_thread = Thread(target=read_registry)
111+
112+
first_thread.start()
113+
initialization_started.wait(5)
114+
second_thread.start()
115+
release_initialization.set()
116+
first_thread.join(5)
117+
second_thread.join(5)
118+
119+
self.assertFalse(first_thread.is_alive())
120+
self.assertFalse(second_thread.is_alive())
121+
self.assertEqual([], errors)
122+
registry._initialize.assert_called_once()
123+
73124
@patch("geonode.proxy.views.proxy_urls_registry", ProxyUrlsRegistry().set([TEST_DOMAIN]))
74125
def test_proxy_allowed_host(self):
75126
"""If PROXY_ALLOWED_HOSTS is not empty and DEBUG is False requests should return no error."""

geonode/proxy/utils.py

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from threading import RLock
12
from urllib.parse import urlsplit
23

34
from django.conf import settings
@@ -9,50 +10,77 @@
910

1011

1112
class ProxyUrlsRegistry:
12-
_last_registry_load = None
1313
_registry_reload_threshold = getattr(settings, "PROXY_RELOAD_REGISTRY_THRESHOLD_DAYS", 1)
1414

15-
def initialize(self):
15+
def __init__(self):
16+
self._lock = RLock()
17+
self._last_registry_load = None
18+
self._proxy_allowed_hosts = None
19+
20+
def _needs_initialization(self):
21+
return (
22+
self._last_registry_load is None
23+
or (now() - self._last_registry_load).days >= self._registry_reload_threshold
24+
)
25+
26+
@property
27+
def proxy_allowed_hosts(self):
28+
# We register remote hosts from remote URLs at creation time, inside ImporterViewSet.create().
29+
# If for some reason the creation fails we end up having stale or wrong URLs inside the registry.
30+
# We check the last time the registry was updated, and after a certain delta we reinitialize the registry
31+
# which removes any URLs that are not connected to remote datasets.
32+
if self._needs_initialization():
33+
with self._lock:
34+
if self._needs_initialization():
35+
self._initialize()
36+
return self._proxy_allowed_hosts
37+
38+
@proxy_allowed_hosts.setter
39+
def proxy_allowed_hosts(self, hosts):
40+
self._proxy_allowed_hosts = set(hosts)
41+
42+
def _initialize(self):
1643
from geonode.base.models import Link
1744
from geonode.geoserver.helpers import ogc_server_settings
1845

19-
self.proxy_allowed_hosts = set([site_url.hostname] + list(getattr(settings, "PROXY_ALLOWED_HOSTS", ())))
46+
proxy_allowed_hosts = set([site_url.hostname] + list(getattr(settings, "PROXY_ALLOWED_HOSTS", ())))
2047

2148
if ogc_server_settings:
22-
self.register_host(ogc_server_settings.hostname)
49+
proxy_allowed_hosts.add(ogc_server_settings.hostname)
2350

2451
for link in Link.objects.filter(resource__sourcetype="REMOTE", link_type__in=PROXIED_LINK_TYPES):
2552
remote_host = urlsplit(link.url).hostname
26-
self.register_host(remote_host)
53+
proxy_allowed_hosts.add(remote_host)
2754

55+
self.proxy_allowed_hosts = proxy_allowed_hosts
2856
self._last_registry_load = now()
2957

58+
def initialize(self):
59+
"""Rebuild the registry safely for direct callers."""
60+
with self._lock:
61+
self._initialize()
62+
3063
def set(self, hosts):
31-
self.proxy_allowed_hosts = set(hosts)
32-
self._last_registry_load = now()
64+
with self._lock:
65+
self.proxy_allowed_hosts = set(hosts)
66+
self._last_registry_load = now()
3367
return self
3468

3569
def clear(self):
36-
self.proxy_allowed_hosts = set()
37-
self._last_registry_load = now()
70+
with self._lock:
71+
self.proxy_allowed_hosts = set()
72+
self._last_registry_load = now()
3873
return self
3974

4075
def register_host(self, host):
41-
self.proxy_allowed_hosts.add(host)
76+
with self._lock:
77+
self.proxy_allowed_hosts.add(host)
4278

4379
def unregister_host(self, host):
44-
self.proxy_allowed_hosts.remove(host)
80+
with self._lock:
81+
self.proxy_allowed_hosts.remove(host)
4582

4683
def get_proxy_allowed_hosts(self):
47-
# We register remote hosts from remote URLs at creation time, inside ImporterViewSet.create().
48-
# If for some reason the creation fails we end up having stale or wrong URLs inside the registry.
49-
# We check the last time the registry was updated, and after a certain delta we reinitialize the registry
50-
# which removes any URLs that are not connected to remote datasets
51-
if (
52-
self._last_registry_load is None
53-
or (now() - self._last_registry_load).days >= self._registry_reload_threshold
54-
):
55-
self.initialize()
5684
return self.proxy_allowed_hosts
5785

5886

0 commit comments

Comments
 (0)