Skip to content

Commit 123ab0b

Browse files
committed
change: Use PreferredURLPatterns instead of PreferredURLPattern
1 parent cc5996b commit 123ab0b

3 files changed

Lines changed: 56 additions & 30 deletions

File tree

docs/source/AdministratorGuide/Configuration/ConfReference/Systems/index.rst

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,27 +45,32 @@ However, this approach has limitations in certain scenarios:
4545
- External servers ("voboxes") running at sites may not be accessible from all clients
4646
- Connection attempts to inaccessible servers cause errors that, while harmless due to fallback mechanisms, slow down DIRAC and generate misleading error messages
4747

48-
To address these issues, you can define a ``PreferredURLPattern`` that identifies a subset of URLs to try first:
48+
To address these issues, you can define a ``PreferredURLPatterns`` that identifies a subset of URLs to try first:
4949

5050
.. code-block:: guess
5151
5252
System
5353
{
54-
PreferredURLPattern = .*\.main\.invalid/.*
5554
URLs
5655
{
5756
Service = dips://host1.main.invalid:1234/System/Service,dips://host2.main.invalid:1234/System/Service,dips://external.invalid:1234/System/Service
5857
}
5958
}
59+
DIRAC
60+
{
61+
PreferredURLPatterns = .*\.main\.invalid/.*
62+
}
6063
6164
In this example:
6265

63-
1. The ``PreferredURLPattern`` specifies a regular expression that matches servers in the ``main.invalid`` domain
66+
1. The ``PreferredURLPatterns`` specifies a regular expression that matches servers in the ``main.invalid`` domain
6467
2. When connecting to the service, DIRAC will first try URLs matching this pattern (``host1.main.invalid`` and ``host2.main.invalid``)
6568
3. Only if these preferred servers fail will DIRAC attempt to connect to other servers (``external.invalid``)
6669

6770
This approach reduces connection errors and improves performance by prioritizing servers that are more likely to be accessible from the client.
6871

72+
.. note:: The ``PreferredURLPatterns`` is a list of regular expressions, not a single regular expression. This allows you to specify multiple patterns to match different subsets of servers if desired.
73+
6974

7075
Main Servers
7176
------------

src/DIRAC/ConfigurationSystem/Client/PathFinder.py

Lines changed: 39 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
""" Collection of utilities for finding paths in the CS
22
"""
33
import re
4-
from functools import lru_cache
4+
from copy import deepcopy
5+
from collections.abc import Iterable
56
from urllib import parse
67

8+
from cachetools import cached, TTLCache
9+
710
from DIRAC.Core.Utilities import List
811
from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData
912
from DIRAC.ConfigurationSystem.Client.Helpers import Path
@@ -209,20 +212,41 @@ def getSystemURLs(system, setup=False, failover=False):
209212
return urlDict
210213

211214

212-
@lru_cache(maxsize=1024)
213-
def checkPreferredURL(url, preferredURLPattern):
214-
"""Check if the URL matches the preferred URL pattern.
215+
def groupURLsByPriority(urls: Iterable[str]) -> list[set[str]]:
216+
"""Group URLs by priority.
217+
218+
:param Iterable[str] preferredURLPatterns: patterns to check in ranked order
219+
:param set[str] urls: URLs to check
220+
221+
:return: list[set[str]] -- list of URL groups, ordered by priority
222+
"""
223+
return deepcopy(_groupURLsByPriority(frozenset(urls)))
224+
225+
226+
@cached(cache=TTLCache(maxsize=1024, ttl=300))
227+
def _groupURLsByPriority(urls: frozenset[str]) -> list[set[str]]:
228+
preferredURLPatterns = [
229+
re.compile(pattern)
230+
for pattern in List.fromChar(gConfigurationData.extractOptionFromCFG("/DIRAC/PreferredURLPatterns"))
231+
]
232+
urlGroups = [set() for _ in range(len(preferredURLPatterns) + 1)]
233+
for url in urls:
234+
urlGroups[findURLPriority(preferredURLPatterns, url)].add(url)
235+
return urlGroups
236+
215237

216-
Default is True if no preferred URL pattern is set.
238+
def findURLPriority(preferredURLPatterns: list[re.Pattern[str]], url: str) -> int:
239+
"""Find which preferred URL pattern the URL matches.
217240
241+
:param str preferredURLPatterns: patterns to check in ranked order
218242
:param str url: URL to check
219-
:param str preferredURLPattern: preferred URL pattern
220243
221-
:return: bool -- True if the URL matches the preferred URL pattern
244+
:return: int -- index of the pattern that matched, smallest is the most preferred
222245
"""
223-
if not preferredURLPattern:
224-
return True
225-
return re.match(preferredURLPattern, url)
246+
for i, pattern in enumerate(preferredURLPatterns):
247+
if re.match(pattern, url):
248+
return i
249+
return len(preferredURLPatterns)
226250

227251

228252
def getServiceURLs(system, service=None, setup=False, failover=False):
@@ -243,10 +267,8 @@ def getServiceURLs(system, service=None, setup=False, failover=False):
243267
# Add failover URLs at the end of the list
244268
failover = "Failover" if failover else ""
245269
for fURLs in ["", "Failover"] if failover else [""]:
246-
preferredURLs = set()
247-
otherURLs = set()
248270
urls = List.fromChar(gConfigurationData.extractOptionFromCFG(f"{systemSection}/{fURLs}URLs/{service}"))
249-
preferredURLPattern = gConfigurationData.extractOptionFromCFG(f"{systemSection}/PreferredURLPattern")
271+
urlList = set()
250272

251273
# Be sure that urls not None
252274
for url in urls or []:
@@ -263,19 +285,13 @@ def getServiceURLs(system, service=None, setup=False, failover=False):
263285

264286
for srv in mainServers:
265287
_url = checkComponentURL(url.replace("$MAINSERVERS$", srv), system, service, pathMandatory=True)
266-
preferredURLs.add(_url)
288+
urlList.add(_url)
267289
continue
268290

269-
_url = checkComponentURL(url, system, service, pathMandatory=True)
270-
# Don't apply the preferred URL pattern to failover URLs
271-
if failover or checkPreferredURL(_url, preferredURLPattern):
272-
preferredURLs.add(_url)
273-
else:
274-
otherURLs.add(_url)
291+
urlList.add(checkComponentURL(url, system, service, pathMandatory=True))
275292

276-
# Randomize list if needed
277-
resList.extend(List.randomize(preferredURLs))
278-
resList.extend(List.randomize(otherURLs))
293+
for urlGroup in groupURLsByPriority(urlList):
294+
resList.extend(List.randomize(urlGroup))
279295

280296
return resList
281297

src/DIRAC/ConfigurationSystem/Client/test/Test_PathFinder.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,23 @@
2323
WorkloadManagement=MyWM
2424
}
2525
}
26+
PreferredURLPatterns = dips://.*\.site:.*
27+
PreferredURLPatterns += dips://.*\.other:.*
2628
}
2729
Systems
2830
{
2931
Configuration
3032
{
3133
MyCS
3234
{
33-
PreferredURLPattern = dips://.*\.site:.*
3435
URLs
3536
{
3637
Server = dips://server1.site:1234/Configuration/Server
3738
Server += dips://server2.site:1234/Configuration/Server
3839
Server += dips://server3.site:1234/Configuration/Server
3940
Server += dips://server4.site:1234/Configuration/Server
40-
Server += dips://server4.external:1234/Configuration/Server
41+
Server += dips://server.other:1234/Configuration/Server
42+
Server += dips://server.external:1234/Configuration/Server
4143
}
4244
}
4345
}
@@ -230,10 +232,13 @@ def test_getServiceURLsOrdering(pathFinder):
230232
"dips://server2.site:1234/Configuration/Server",
231233
"dips://server3.site:1234/Configuration/Server",
232234
"dips://server4.site:1234/Configuration/Server",
233-
"dips://server4.external:1234/Configuration/Server",
235+
"dips://server.other:1234/Configuration/Server",
236+
"dips://server.external:1234/Configuration/Server",
234237
}
238+
# The second to last URL should always be "other"
239+
assert urls[-2] == "dips://server.other:1234/Configuration/Server"
235240
# The last URL should always be the one which isn't preferred
236-
assert urls[-1] == "dips://server4.external:1234/Configuration/Server"
241+
assert urls[-1] == "dips://server.external:1234/Configuration/Server"
237242
all_results.add(tuple(urls))
238243
# There are 4! = 24 possible orderings of the preferred URLs, we should have seen all
239244
# of them at least once in 10_000 iterations

0 commit comments

Comments
 (0)