Skip to content

Commit fc8ac3c

Browse files
committed
feat: Add PreferredURLPatterns for URL sorting
1 parent 83762a6 commit fc8ac3c

4 files changed

Lines changed: 132 additions & 11 deletions

File tree

docs/source/AdministratorGuide/Configuration/ConfReference/Systems/index.rst

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,44 @@ In each system, per setup, you normally find the following sections:
3434
* FailoverURLs: Like URLs, but they are only tried if no server in URLs was successfully contacted.
3535

3636

37+
Preferred URLs
38+
--------------
39+
40+
For most services, the standard ``URLs`` and ``FailoverURLs`` mechanism provides a way to specify primary and backup service endpoints.
41+
42+
However, this approach has limitations in certain scenarios:
43+
44+
- Some services (like the Configuration service) have replicas that automatically register themselves in the Configuration System
45+
- External servers ("voboxes") running at sites may not be accessible from all clients
46+
- Connection attempts to inaccessible servers cause errors that, while harmless due to fallback mechanisms, slow down DIRAC and generate misleading error messages
47+
48+
To address these issues, you can define a ``PreferredURLPatterns`` that identifies a subset of URLs to try first:
49+
50+
.. code-block:: guess
51+
52+
System
53+
{
54+
URLs
55+
{
56+
Service = dips://host1.main.invalid:1234/System/Service,dips://host2.main.invalid:1234/System/Service,dips://external.invalid:1234/System/Service
57+
}
58+
}
59+
DIRAC
60+
{
61+
PreferredURLPatterns = .*\.main\.invalid/.*
62+
}
63+
64+
In this example:
65+
66+
1. The ``PreferredURLPatterns`` specifies a regular expression that matches servers in the ``main.invalid`` domain
67+
2. When connecting to the service, DIRAC will first try URLs matching this pattern (``host1.main.invalid`` and ``host2.main.invalid``)
68+
3. Only if these preferred servers fail will DIRAC attempt to connect to other servers (``external.invalid``)
69+
70+
This approach reduces connection errors and improves performance by prioritizing servers that are more likely to be accessible from the client.
71+
72+
.. note:: The ``PreferredURLPatterns`` is a list of regular expressions, not a single regular expression. This allows you to specify multiple patterns to match different subsets of servers if desired.
73+
74+
3775
Main Servers
3876
------------
3977

src/DIRAC/ConfigurationSystem/Client/PathFinder.py

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
""" Collection of utilities for finding paths in the CS
22
"""
3+
import re
4+
from copy import deepcopy
5+
from collections.abc import Iterable
36
from urllib import parse
47

8+
from cachetools import cached, TTLCache
9+
510
from DIRAC.Core.Utilities import List
611
from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData
712
from DIRAC.ConfigurationSystem.Client.Helpers import Path
@@ -207,6 +212,43 @@ def getSystemURLs(system, setup=False, failover=False):
207212
return urlDict
208213

209214

215+
def groupURLsByPriority(urls: Iterable[str]) -> list[set[str]]:
216+
"""Group URLs by priority.
217+
218+
:param Iterable[str] preferredURLPatterns: patterns to check in ranked order
219+
:param set[str] urls: URLs to check
220+
221+
:return: list[set[str]] -- list of URL groups, ordered by priority
222+
"""
223+
return deepcopy(_groupURLsByPriority(frozenset(urls)))
224+
225+
226+
@cached(cache=TTLCache(maxsize=1024, ttl=300))
227+
def _groupURLsByPriority(urls: frozenset[str]) -> list[set[str]]:
228+
preferredURLPatterns = []
229+
if patterns := gConfigurationData.extractOptionFromCFG("/DIRAC/PreferredURLPatterns"):
230+
preferredURLPatterns = [re.compile(pattern) for pattern in List.fromChar(patterns)]
231+
232+
urlGroups = [set() for _ in range(len(preferredURLPatterns) + 1)]
233+
for url in urls:
234+
urlGroups[findURLPriority(preferredURLPatterns, url)].add(url)
235+
return urlGroups
236+
237+
238+
def findURLPriority(preferredURLPatterns: list[re.Pattern[str]], url: str) -> int:
239+
"""Find which preferred URL pattern the URL matches.
240+
241+
:param str preferredURLPatterns: patterns to check in ranked order
242+
:param str url: URL to check
243+
244+
:return: int -- index of the pattern that matched, smallest is the most preferred
245+
"""
246+
for i, pattern in enumerate(preferredURLPatterns):
247+
if re.match(pattern, url):
248+
return i
249+
return len(preferredURLPatterns)
250+
251+
210252
def getServiceURLs(system, service=None, setup=False, failover=False):
211253
"""Generate url.
212254
@@ -225,8 +267,8 @@ def getServiceURLs(system, service=None, setup=False, failover=False):
225267
# Add failover URLs at the end of the list
226268
failover = "Failover" if failover else ""
227269
for fURLs in ["", "Failover"] if failover else [""]:
228-
urlList = []
229270
urls = List.fromChar(gConfigurationData.extractOptionFromCFG(f"{systemSection}/{fURLs}URLs/{service}"))
271+
urlList = set()
230272

231273
# Be sure that urls not None
232274
for url in urls or []:
@@ -243,16 +285,13 @@ def getServiceURLs(system, service=None, setup=False, failover=False):
243285

244286
for srv in mainServers:
245287
_url = checkComponentURL(url.replace("$MAINSERVERS$", srv), system, service, pathMandatory=True)
246-
if _url not in urlList:
247-
urlList.append(_url)
288+
urlList.add(_url)
248289
continue
249290

250-
_url = checkComponentURL(url, system, service, pathMandatory=True)
251-
if _url not in urlList:
252-
urlList.append(_url)
291+
urlList.add(checkComponentURL(url, system, service, pathMandatory=True))
253292

254-
# Randomize list if needed
255-
resList.extend(List.randomize(urlList))
293+
for urlGroup in groupURLsByPriority(urlList):
294+
resList.extend(List.randomize(urlGroup))
256295

257296
return resList
258297

src/DIRAC/ConfigurationSystem/Client/test/Test_PathFinder.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,38 @@
1111
localCFGData = ConfigurationData(False)
1212
mergedCFG = CFG()
1313
mergedCFG.loadFromBuffer(
14-
"""
14+
r"""
1515
DIRAC
1616
{
1717
Setup=TestSetup
1818
Setups
1919
{
2020
TestSetup
2121
{
22+
Configuration=MyCS
2223
WorkloadManagement=MyWM
2324
}
2425
}
26+
PreferredURLPatterns = dips://.*\.site:.*
27+
PreferredURLPatterns += dips://.*\.other:.*
2528
}
2629
Systems
2730
{
31+
Configuration
32+
{
33+
MyCS
34+
{
35+
URLs
36+
{
37+
Server = dips://server1.site:1234/Configuration/Server
38+
Server += dips://server2.site:1234/Configuration/Server
39+
Server += dips://server3.site:1234/Configuration/Server
40+
Server += dips://server4.site:1234/Configuration/Server
41+
Server += dips://server.other:1234/Configuration/Server
42+
Server += dips://server.external:1234/Configuration/Server
43+
}
44+
}
45+
}
2846
WorkloadManagement
2947
{
3048
MyWM
@@ -204,6 +222,29 @@ def test_getServiceURLs(pathFinder, serviceName, service, failover, result):
204222
assert set(pathFinder.getServiceURLs(serviceName, service=service, failover=failover)) == result
205223

206224

225+
def test_getServiceURLsOrdering(pathFinder):
226+
"""Ensure the PreferredURLPattern option is respected"""
227+
all_results = set()
228+
for _ in range(10_000):
229+
urls = pathFinder.getServiceURLs("Configuration", service="Server")
230+
assert set(urls) == {
231+
"dips://server1.site:1234/Configuration/Server",
232+
"dips://server2.site:1234/Configuration/Server",
233+
"dips://server3.site:1234/Configuration/Server",
234+
"dips://server4.site:1234/Configuration/Server",
235+
"dips://server.other:1234/Configuration/Server",
236+
"dips://server.external:1234/Configuration/Server",
237+
}
238+
# The second to last URL should always be "other"
239+
assert urls[-2] == "dips://server.other:1234/Configuration/Server"
240+
# The last URL should always be the one which isn't preferred
241+
assert urls[-1] == "dips://server.external:1234/Configuration/Server"
242+
all_results.add(tuple(urls))
243+
# There are 4! = 24 possible orderings of the preferred URLs, we should have seen all
244+
# of them at least once in 10_000 iterations
245+
assert len(all_results) >= 24
246+
247+
207248
@pytest.mark.parametrize(
208249
"system, setup, failover, result",
209250
[

src/DIRAC/Core/Utilities/List.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
"""
44
import random
55
import sys
6-
from typing import Any
6+
from typing import Any, TypeVar
7+
from collections.abc import Iterable
8+
9+
T = TypeVar("T")
710

811

912
def uniqueElements(aList: list) -> list:
@@ -37,7 +40,7 @@ def fromChar(inputString: str, sepChar: str = ","):
3740
return [fieldString.strip() for fieldString in inputString.split(sepChar) if len(fieldString.strip()) > 0]
3841

3942

40-
def randomize(aList: list) -> list:
43+
def randomize(aList: Iterable[T]) -> list[T]:
4144
"""Return a randomly sorted list.
4245
4346
:param aList: list to permute

0 commit comments

Comments
 (0)