Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,44 @@ In each system you normally find the following sections:
* FailoverURLs: Like URLs, but they are only tried if no server in URLs was successfully contacted.


Preferred URLs
--------------

For most services, the standard ``URLs`` and ``FailoverURLs`` mechanism provides a way to specify primary and backup service endpoints.

However, this approach has limitations in certain scenarios:

- Some services (like the Configuration service) have replicas that automatically register themselves in the Configuration System
- External servers ("voboxes") running at sites may not be accessible from all clients
- Connection attempts to inaccessible servers cause errors that, while harmless due to fallback mechanisms, slow down DIRAC and generate misleading error messages

To address these issues, you can define a ``PreferredURLPatterns`` that identifies a subset of URLs to try first:

.. code-block:: guess

System
{
URLs
{
Service = dips://host1.main.invalid:1234/System/Service,dips://host2.main.invalid:1234/System/Service,dips://external.invalid:1234/System/Service
}
}
DIRAC
{
PreferredURLPatterns = .*\.main\.invalid/.*
}

In this example:

1. The ``PreferredURLPatterns`` specifies a regular expression that matches servers in the ``main.invalid`` domain
2. When connecting to the service, DIRAC will first try URLs matching this pattern (``host1.main.invalid`` and ``host2.main.invalid``)
3. Only if these preferred servers fail will DIRAC attempt to connect to other servers (``external.invalid``)

This approach reduces connection errors and improves performance by prioritizing servers that are more likely to be accessible from the client.

.. note:: The ``PreferredURLPatterns`` is a list of regular expressions, not a single regular expression. This allows you to specify multiple patterns to match different subsets of servers if desired.


Main Servers
------------

Expand Down
55 changes: 47 additions & 8 deletions src/DIRAC/ConfigurationSystem/Client/PathFinder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
""" Collection of utilities for finding paths in the CS
"""
import re
from copy import deepcopy
from collections.abc import Iterable
from urllib import parse

from cachetools import cached, TTLCache

from DIRAC.Core.Utilities import List
from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData
from DIRAC.ConfigurationSystem.Client.Helpers import Path
Expand Down Expand Up @@ -151,6 +156,43 @@ def getSystemURLs(system, failover=False):
return urlDict


def groupURLsByPriority(urls: Iterable[str]) -> list[set[str]]:
"""Group URLs by priority.

:param Iterable[str] preferredURLPatterns: patterns to check in ranked order
:param set[str] urls: URLs to check

:return: list[set[str]] -- list of URL groups, ordered by priority
"""
return deepcopy(_groupURLsByPriority(frozenset(urls)))


@cached(cache=TTLCache(maxsize=1024, ttl=300))
def _groupURLsByPriority(urls: frozenset[str]) -> list[set[str]]:
preferredURLPatterns = []
if patterns := gConfigurationData.extractOptionFromCFG("/DIRAC/PreferredURLPatterns"):
preferredURLPatterns = [re.compile(pattern) for pattern in List.fromChar(patterns)]

urlGroups = [set() for _ in range(len(preferredURLPatterns) + 1)]
for url in urls:
urlGroups[findURLPriority(preferredURLPatterns, url)].add(url)
return urlGroups


def findURLPriority(preferredURLPatterns: list[re.Pattern[str]], url: str) -> int:
"""Find which preferred URL pattern the URL matches.

:param str preferredURLPatterns: patterns to check in ranked order
:param str url: URL to check

:return: int -- index of the pattern that matched, smallest is the most preferred
"""
for i, pattern in enumerate(preferredURLPatterns):
if re.match(pattern, url):
return i
return len(preferredURLPatterns)


def getServiceURLs(system, service=None, failover=False):
"""Generate url.

Expand All @@ -168,8 +210,8 @@ def getServiceURLs(system, service=None, failover=False):
# Add failover URLs at the end of the list
failover = "Failover" if failover else ""
for fURLs in ["", "Failover"] if failover else [""]:
urlList = []
urls = List.fromChar(gConfigurationData.extractOptionFromCFG(f"{systemSection}/{fURLs}URLs/{service}"))
urlList = set()

# Be sure that urls not None
for url in urls or []:
Expand All @@ -186,16 +228,13 @@ def getServiceURLs(system, service=None, failover=False):

for srv in mainServers:
_url = checkComponentURL(url.replace("$MAINSERVERS$", srv), system, service, pathMandatory=True)
if _url not in urlList:
urlList.append(_url)
urlList.add(_url)
continue

_url = checkComponentURL(url, system, service, pathMandatory=True)
if _url not in urlList:
urlList.append(_url)
urlList.add(checkComponentURL(url, system, service, pathMandatory=True))

# Randomize list if needed
resList.extend(List.randomize(urlList))
for urlGroup in groupURLsByPriority(urlList):
resList.extend(List.randomize(urlGroup))

return resList

Expand Down
42 changes: 41 additions & 1 deletion src/DIRAC/ConfigurationSystem/Client/test/Test_PathFinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,26 @@
localCFGData = ConfigurationData(False)
mergedCFG = CFG()
mergedCFG.loadFromBuffer(
"""
r"""
DIRAC
{
PreferredURLPatterns = dips://.*\.site:.*
PreferredURLPatterns += dips://.*\.other:.*
}
Systems
{
Configuration
{
URLs
{
Server = dips://server1.site:1234/Configuration/Server
Server += dips://server2.site:1234/Configuration/Server
Server += dips://server3.site:1234/Configuration/Server
Server += dips://server4.site:1234/Configuration/Server
Server += dips://server.other:1234/Configuration/Server
Server += dips://server.external:1234/Configuration/Server
}
}
WorkloadManagement
{
URLs
Expand Down Expand Up @@ -181,6 +198,29 @@ def test_getServiceURLs(pathFinder, serviceName, service, failover, result):
assert set(pathFinder.getServiceURLs(serviceName, service=service, failover=failover)) == result


def test_getServiceURLsOrdering(pathFinder):
"""Ensure the PreferredURLPattern option is respected"""
all_results = set()
for _ in range(10_000):
urls = pathFinder.getServiceURLs("Configuration", service="Server")
assert set(urls) == {
"dips://server1.site:1234/Configuration/Server",
"dips://server2.site:1234/Configuration/Server",
"dips://server3.site:1234/Configuration/Server",
"dips://server4.site:1234/Configuration/Server",
"dips://server.other:1234/Configuration/Server",
"dips://server.external:1234/Configuration/Server",
}
# The second to last URL should always be "other"
assert urls[-2] == "dips://server.other:1234/Configuration/Server"
# The last URL should always be the one which isn't preferred
assert urls[-1] == "dips://server.external:1234/Configuration/Server"
all_results.add(tuple(urls))
# There are 4! = 24 possible orderings of the preferred URLs, we should have seen all
# of them at least once in 10_000 iterations
assert len(all_results) >= 24


@pytest.mark.parametrize(
"system, failover, result",
[
Expand Down
7 changes: 5 additions & 2 deletions src/DIRAC/Core/Utilities/List.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
"""
import random
import sys
from typing import Any
from typing import Any, TypeVar
from collections.abc import Iterable

T = TypeVar("T")


def uniqueElements(aList: list) -> list:
Expand Down Expand Up @@ -37,7 +40,7 @@ def fromChar(inputString: str, sepChar: str = ","):
return [fieldString.strip() for fieldString in inputString.split(sepChar) if len(fieldString.strip()) > 0]


def randomize(aList: list) -> list:
def randomize(aList: Iterable[T]) -> list[T]:
"""Return a randomly sorted list.

:param aList: list to permute
Expand Down
Loading