Skip to content

Commit cc5996b

Browse files
committed
feat: Add PreferredURLPattern for URL sorting
1 parent b1a91d5 commit cc5996b

4 files changed

Lines changed: 104 additions & 9 deletions

File tree

docs/source/AdministratorGuide/Configuration/ConfReference/Systems/index.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,39 @@ In each system, per setup, you normally find the following sections:
3434
* FailoverURLs: Like URLs, but they are only tried if no server in URLs was successfully contacted.
3535

3636

37+
Preferred URLs
38+
--------------
39+
40+
For most services, the standard ``URLs`` and ``FailoverURLs`` mechanism provides a way to specify primary and backup service endpoints.
41+
42+
However, this approach has limitations in certain scenarios:
43+
44+
- Some services (like the Configuration service) have replicas that automatically register themselves in the Configuration System
45+
- External servers ("voboxes") running at sites may not be accessible from all clients
46+
- Connection attempts to inaccessible servers cause errors that, while harmless due to fallback mechanisms, slow down DIRAC and generate misleading error messages
47+
48+
To address these issues, you can define a ``PreferredURLPattern`` that identifies a subset of URLs to try first:
49+
50+
.. code-block:: guess
51+
52+
System
53+
{
54+
PreferredURLPattern = .*\.main\.invalid/.*
55+
URLs
56+
{
57+
Service = dips://host1.main.invalid:1234/System/Service,dips://host2.main.invalid:1234/System/Service,dips://external.invalid:1234/System/Service
58+
}
59+
}
60+
61+
In this example:
62+
63+
1. The ``PreferredURLPattern`` specifies a regular expression that matches servers in the ``main.invalid`` domain
64+
2. When connecting to the service, DIRAC will first try URLs matching this pattern (``host1.main.invalid`` and ``host2.main.invalid``)
65+
3. Only if these preferred servers fail will DIRAC attempt to connect to other servers (``external.invalid``)
66+
67+
This approach reduces connection errors and improves performance by prioritizing servers that are more likely to be accessible from the client.
68+
69+
3770
Main Servers
3871
------------
3972

src/DIRAC/ConfigurationSystem/Client/PathFinder.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
""" Collection of utilities for finding paths in the CS
22
"""
3+
import re
4+
from functools import lru_cache
35
from urllib import parse
46

57
from DIRAC.Core.Utilities import List
@@ -207,6 +209,22 @@ def getSystemURLs(system, setup=False, failover=False):
207209
return urlDict
208210

209211

212+
@lru_cache(maxsize=1024)
213+
def checkPreferredURL(url, preferredURLPattern):
214+
"""Check if the URL matches the preferred URL pattern.
215+
216+
Default is True if no preferred URL pattern is set.
217+
218+
:param str url: URL to check
219+
:param str preferredURLPattern: preferred URL pattern
220+
221+
:return: bool -- True if the URL matches the preferred URL pattern
222+
"""
223+
if not preferredURLPattern:
224+
return True
225+
return re.match(preferredURLPattern, url)
226+
227+
210228
def getServiceURLs(system, service=None, setup=False, failover=False):
211229
"""Generate url.
212230
@@ -225,8 +243,10 @@ def getServiceURLs(system, service=None, setup=False, failover=False):
225243
# Add failover URLs at the end of the list
226244
failover = "Failover" if failover else ""
227245
for fURLs in ["", "Failover"] if failover else [""]:
228-
urlList = []
246+
preferredURLs = set()
247+
otherURLs = set()
229248
urls = List.fromChar(gConfigurationData.extractOptionFromCFG(f"{systemSection}/{fURLs}URLs/{service}"))
249+
preferredURLPattern = gConfigurationData.extractOptionFromCFG(f"{systemSection}/PreferredURLPattern")
230250

231251
# Be sure that urls not None
232252
for url in urls or []:
@@ -243,16 +263,19 @@ def getServiceURLs(system, service=None, setup=False, failover=False):
243263

244264
for srv in mainServers:
245265
_url = checkComponentURL(url.replace("$MAINSERVERS$", srv), system, service, pathMandatory=True)
246-
if _url not in urlList:
247-
urlList.append(_url)
266+
preferredURLs.add(_url)
248267
continue
249268

250269
_url = checkComponentURL(url, system, service, pathMandatory=True)
251-
if _url not in urlList:
252-
urlList.append(_url)
270+
# Don't apply the preferred URL pattern to failover URLs
271+
if failover or checkPreferredURL(_url, preferredURLPattern):
272+
preferredURLs.add(_url)
273+
else:
274+
otherURLs.add(_url)
253275

254276
# Randomize list if needed
255-
resList.extend(List.randomize(urlList))
277+
resList.extend(List.randomize(preferredURLs))
278+
resList.extend(List.randomize(otherURLs))
256279

257280
return resList
258281

src/DIRAC/ConfigurationSystem/Client/test/Test_PathFinder.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,36 @@
1111
localCFGData = ConfigurationData(False)
1212
mergedCFG = CFG()
1313
mergedCFG.loadFromBuffer(
14-
"""
14+
r"""
1515
DIRAC
1616
{
1717
Setup=TestSetup
1818
Setups
1919
{
2020
TestSetup
2121
{
22+
Configuration=MyCS
2223
WorkloadManagement=MyWM
2324
}
2425
}
2526
}
2627
Systems
2728
{
29+
Configuration
30+
{
31+
MyCS
32+
{
33+
PreferredURLPattern = dips://.*\.site:.*
34+
URLs
35+
{
36+
Server = dips://server1.site:1234/Configuration/Server
37+
Server += dips://server2.site:1234/Configuration/Server
38+
Server += dips://server3.site:1234/Configuration/Server
39+
Server += dips://server4.site:1234/Configuration/Server
40+
Server += dips://server4.external:1234/Configuration/Server
41+
}
42+
}
43+
}
2844
WorkloadManagement
2945
{
3046
MyWM
@@ -204,6 +220,26 @@ def test_getServiceURLs(pathFinder, serviceName, service, failover, result):
204220
assert set(pathFinder.getServiceURLs(serviceName, service=service, failover=failover)) == result
205221

206222

223+
def test_getServiceURLsOrdering(pathFinder):
224+
"""Ensure the PreferredURLPattern option is respected"""
225+
all_results = set()
226+
for _ in range(10_000):
227+
urls = pathFinder.getServiceURLs("Configuration", service="Server")
228+
assert set(urls) == {
229+
"dips://server1.site:1234/Configuration/Server",
230+
"dips://server2.site:1234/Configuration/Server",
231+
"dips://server3.site:1234/Configuration/Server",
232+
"dips://server4.site:1234/Configuration/Server",
233+
"dips://server4.external:1234/Configuration/Server",
234+
}
235+
# The last URL should always be the one which isn't preferred
236+
assert urls[-1] == "dips://server4.external:1234/Configuration/Server"
237+
all_results.add(tuple(urls))
238+
# There are 4! = 24 possible orderings of the preferred URLs, we should have seen all
239+
# of them at least once in 10_000 iterations
240+
assert len(all_results) >= 24
241+
242+
207243
@pytest.mark.parametrize(
208244
"system, setup, failover, result",
209245
[

src/DIRAC/Core/Utilities/List.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
"""
44
import random
55
import sys
6-
from typing import Any
6+
from typing import Any, TypeVar
7+
from collections.abc import Iterable
8+
9+
T = TypeVar("T")
710

811

912
def uniqueElements(aList: list) -> list:
@@ -37,7 +40,7 @@ def fromChar(inputString: str, sepChar: str = ","):
3740
return [fieldString.strip() for fieldString in inputString.split(sepChar) if len(fieldString.strip()) > 0]
3841

3942

40-
def randomize(aList: list) -> list:
43+
def randomize(aList: Iterable[T]) -> list[T]:
4144
"""Return a randomly sorted list.
4245
4346
:param aList: list to permute

0 commit comments

Comments
 (0)