Skip to content

Commit b6b0340

Browse files
committed
feat: InfoGetter enforces policyType, supports per-policy arg overrides and generic specificity deduplication
- Configurations.py: POLICIESMETA now holds plain code-level defaults only; Operations() calls removed (CS is read at runtime by InfoGetter instead) - InfoGetter.getPoliciesThatApply: CS entries without policyType are skipped (they are command-args defaults sections, not policy definitions); non-reserved keys in a CS policy entry are collected as arg overrides, merged into the policy args with case-insensitive key normalisation and type casting - InfoGetter.postProcessingPolicyList: replaces the old FreeDiskSpaceMB/GB/TB hack with a generic rule — when multiple policies of the same policyType match, keep the most specific one (name-match presence > number of matchParams keys) - dirac.cfg: add SpecificFreeDiskSpace example (Unit=GB, Banned_threshold=15, Degraded_threshold falls back to default); fix missing closing brace in Policies - docs: note that command-args sections have no policyType and are not policy definitions - test: new Test_InfoGetter.py with 15 unit tests covering all the above
1 parent 2101211 commit b6b0340

5 files changed

Lines changed: 383 additions & 41 deletions

File tree

dirac.cfg

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1173,12 +1173,15 @@ Operations
11731173
# Command arguments for the built-in Downtime policy type.
11741174
# hours = 0 means only ongoing downtimes are considered (default).
11751175
# Set hours > 0 to also catch downtimes starting within that window.
1176+
# Note: this section has no policyType key and is therefore NOT treated
1177+
# as a policy definition — it only sets command argument defaults.
11761178
Downtime
11771179
{
11781180
hours = 0 # look-ahead window in hours (0 = ongoing only, default)
11791181
}
11801182
# Command arguments for the built-in FreeDiskSpace policy type.
11811183
# Unit and thresholds apply to all SEs monitored by this policy.
1184+
# Note: same as above — no policyType key, so not a policy definition.
11821185
FreeDiskSpace
11831186
{
11841187
Unit = TB # Space unit: TB (default), GB or MB
@@ -1214,6 +1217,18 @@ Operations
12141217
statusType = WriteAccess
12151218
}
12161219
}
1220+
# Example: apply FreeDiskSpace to SE1 with specific args (Unit and Banned_threshold);
1221+
# Degraded_threshold falls back to the default defined in the FreeDiskSpace section above.
1222+
SpecificFreeDiskSpace
1223+
{
1224+
policyType = FreeDiskSpace
1225+
Unit = GB
1226+
Banned_threshold = 15
1227+
matchParams
1228+
{
1229+
name = SE1
1230+
}
1231+
}
12171232
}
12181233
PolicyActions
12191234
{

docs/source/AdministratorGuide/Systems/ResourceStatus/advanced_configuration.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ The look-ahead window is configurable from the Operations CS:
9191
are considered. Setting a positive value (e.g. ``12``) also catches downtimes scheduled
9292
to start within that window, which is useful for proactive status changes.
9393

94+
This section has no ``policyType`` key and is therefore treated purely as
95+
command-argument defaults, not as a policy definition.
96+
9497
Example: flag elements with downtimes starting within the next 24 hours::
9598

9699
/Operations/Defaults/ResourceStatus/Policies/Downtime
@@ -124,7 +127,8 @@ from the Operations CS and fall back to safe defaults:
124127

125128
These keys live under ``/Operations/Defaults/ResourceStatus/Policies/FreeDiskSpace``,
126129
not under the ``/matchParams`` sub-section. They tune the **command arguments**, not the
127-
element-matching logic.
130+
element-matching logic. This section has no ``policyType`` key and is therefore not treated
131+
as a policy definition by the policy engine.
128132

129133
The default values of ``0.1`` and ``5`` are always used as fallback regardless of unit.
130134
Make sure to set meaningful threshold values explicitly in the CS when changing the unit.

src/DIRAC/ResourceStatusSystem/Policy/Configurations.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,31 @@
1111
'args' : { arguments for the command } or None
1212
}
1313
14-
"""
14+
The values in ``args`` are code-level defaults. They can be overridden per-policy
15+
via the CS entry (e.g. ``Unit = GB`` directly under the policy name in
16+
``/Operations/Defaults/ResourceStatus/Policies/<PolicyName>``).
17+
Deployment-wide defaults can also be set in a command-args section named after
18+
the policy type (e.g. ``/Operations/Defaults/ResourceStatus/Policies/FreeDiskSpace``);
19+
these are picked up by InfoGetter before code-level defaults are applied.
1520
16-
from DIRAC.ConfigurationSystem.Client.Helpers.Operations import Operations
21+
"""
1722

1823
POLICIESMETA = { # DownTime POLICIES
1924
"Downtime": {
2025
"description": "Ongoing or scheduled down-times within <hours> from now (0 = ongoing only)",
2126
"module": "DowntimePolicy",
2227
"command": ("DowntimeCommand", "DowntimeCommand"),
23-
"args": {"hours": Operations().getValue("ResourceStatus/Policies/Downtime/hours", 0), "onlyCache": True},
28+
"args": {"hours": 0, "onlyCache": True},
2429
},
2530
# Free Disk Space
2631
"FreeDiskSpace": {
2732
"description": "Free disk space",
2833
"module": "FreeDiskSpacePolicy",
2934
"command": ("FreeDiskSpaceCommand", "FreeDiskSpaceCommand"),
3035
"args": {
31-
"unit": Operations().getValue("ResourceStatus/Policies/FreeDiskSpace/Unit", "TB"),
32-
"Banned_threshold": Operations().getValue("ResourceStatus/Policies/FreeDiskSpace/Banned_threshold", 0.1),
33-
"Degraded_threshold": Operations().getValue("ResourceStatus/Policies/FreeDiskSpace/Degraded_threshold", 5),
36+
"unit": "TB",
37+
"Banned_threshold": 0.1,
38+
"Degraded_threshold": 5,
3439
"onlyCache": True,
3540
},
3641
},

src/DIRAC/ResourceStatusSystem/Utilities/InfoGetter.py

Lines changed: 65 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -45,29 +45,32 @@ def getPoliciesThatApply(decisionParams):
4545

4646
# Get policies that match the given decisionParameters
4747
for policyName, policySetup in policiesConfig.items():
48-
# The parameter policyType replaces policyName, so if it is not present,
49-
# we pick policyName
48+
# The parameter policyType is mandatory. If not present, skip this entry —
49+
# it is a command-args defaults section, not a policy definition.
5050
try:
5151
policyType = policySetup["policyType"][0]
5252
except KeyError:
53-
policyType = policyName
54-
# continue
53+
continue
5554

5655
# The section matchParams is not mandatory, so we set {} as default.
5756
policyMatchParams = policySetup.get("matchParams", {})
5857
gLogger.debug(f"matchParams of {policyName}: {str(policyMatchParams)}")
5958

60-
# FIXME: make sure the values in the policyConfigParams dictionary are typed !!
61-
policyConfigParams = {}
62-
# policyConfigParams = policySetup.get( 'configParams', {} )
59+
# Any key in the CS policy entry that is not a reserved keyword is treated as
60+
# a command-argument override. These override the defaults from POLICIESMETA.
61+
_reservedKeys = {"policyType", "matchParams", "configParams", "doNotCombineResult", "active"}
62+
policyConfigParams = {
63+
k: v[0] if isinstance(v, list) else v for k, v in policySetup.items() if k not in _reservedKeys
64+
}
65+
6366
policyMatch = Utils.configMatch(decisionParams, policyMatchParams)
6467
gLogger.debug(f"PolicyMatch for decisionParams {decisionParams}: {str(policyMatch)}")
6568

6669
# WARNING: we need an additional filtering function when the matching
6770
# is not straightforward (e.g. when the policy specify a 'domain', while
6871
# the decisionParams has only the name of the element)
6972
if policyMatch and _filterPolicies(decisionParams, policyMatchParams):
70-
policiesThatApply.append((policyName, policyType, policyConfigParams))
73+
policiesThatApply.append((policyName, policyType, policyConfigParams, policyMatchParams))
7174

7275
gLogger.debug(f"policies that apply (before post-processing): {str(policiesThatApply)}")
7376
policiesThatApply = postProcessingPolicyList(policiesThatApply)
@@ -76,7 +79,7 @@ def getPoliciesThatApply(decisionParams):
7679
objectLoader = ObjectLoader()
7780
policiesToBeLoaded = []
7881
# Gets policies parameters from code.
79-
for policyName, policyType, _policyConfigParams in policiesThatApply:
82+
for policyName, policyType, _policyConfigParams, _policyMatchParams in policiesThatApply:
8083
try:
8184
result = objectLoader.loadModule("DIRAC.ResourceStatusSystem.Policy.Configurations")
8285
if not result["OK"]:
@@ -92,8 +95,22 @@ def getPoliciesThatApply(decisionParams):
9295
policyDict = {"name": policyName, "type": policyType, "args": {}}
9396

9497
# args is one of the parameters we are going to use on the policies. We copy
95-
# the defaults and then we update if with whatever comes from the CS.
98+
# the defaults from POLICIESMETA and then override with whatever comes from the CS.
9699
policyDict.update(policyMeta)
100+
if _policyConfigParams and policyDict.get("args") is not None:
101+
# Build a case-insensitive lookup of the existing arg keys so that CS keys
102+
# like "Unit" correctly override POLICIESMETA keys like "unit".
103+
argsKeyMap = {k.lower(): k for k in policyDict["args"]}
104+
for csKey, csVal in _policyConfigParams.items():
105+
targetKey = argsKeyMap.get(csKey.lower(), csKey)
106+
# CS values are always strings; cast to the type of the existing default.
107+
existingVal = policyDict["args"].get(targetKey)
108+
if existingVal is not None:
109+
try:
110+
csVal = type(existingVal)(csVal)
111+
except (ValueError, TypeError):
112+
pass
113+
policyDict["args"][targetKey] = csVal
97114

98115
policiesToBeLoaded.append(policyDict)
99116

@@ -262,28 +279,42 @@ def _filterPolicies(decisionParams, policyMatchParams):
262279

263280

264281
def postProcessingPolicyList(policiesThatApply):
265-
"""Put here any "hacky" post-processing"""
266-
267-
# FIXME: the following 2 "if" are a "hack" for dealing with the following case:
268-
# an SE happens to be subject to, e.g., both the 'FreeDiskSpaceMB' and the 'FreeDiskSpaceGB' policies
269-
# (currently, there is no way to avoid that this happens, see e.g. LogSE)
270-
# When this is the case, supposing that an SE has 50 MB free, the policies evaluation will be the following:
271-
# - 'FreeDiskSpaceMB' will evaluate 'Active'
272-
# - 'FreeDiskSpaceGB' will evaluate 'Banned'
273-
# so the SE will end up being banned, but we want only the 'FreeDiskSpaceMB' to be considered.
274-
if ("FreeDiskSpaceMB", "FreeDiskSpaceMB", {}) in policiesThatApply:
275-
try:
276-
policiesThatApply.remove(("FreeDiskSpaceGB", "FreeDiskSpaceGB", {}))
277-
except ValueError:
278-
pass
279-
try:
280-
policiesThatApply.remove(("FreeDiskSpaceTB", "FreeDiskSpaceTB", {}))
281-
except ValueError:
282-
pass
283-
if ("FreeDiskSpaceGB", "FreeDiskSpaceGB", {}) in policiesThatApply:
284-
try:
285-
policiesThatApply.remove(("FreeDiskSpaceTB", "FreeDiskSpaceTB", {}))
286-
except ValueError:
287-
pass
282+
"""Remove lower-priority duplicates when multiple policies of the same type apply.
283+
284+
When two or more policies share the same ``policyType`` and both match the current
285+
element, we keep only the most specific one. Specificity is determined by the number
286+
of ``matchParams`` keys: more keys = more specific. If one of the duplicates matched
287+
by ``name`` it is always considered more specific than one that did not.
288+
289+
This replaces the old per-type hacks (``FreeDiskSpaceMB`` > ``FreeDiskSpaceGB`` >
290+
``FreeDiskSpaceTB``) with a generic rule that works for any policy type.
291+
"""
292+
from collections import defaultdict
293+
294+
# Group policies by policyType
295+
byType = defaultdict(list)
296+
for entry in policiesThatApply:
297+
policyName, policyType, policyConfigParams, policyMatchParams = entry
298+
byType[policyType].append(entry)
299+
300+
result = []
301+
for policyType, entries in byType.items():
302+
if len(entries) == 1:
303+
result.extend(entries)
304+
continue
288305

289-
return policiesThatApply
306+
# Multiple policies of the same type matched — keep only the most specific.
307+
# Specificity = number of matchParams keys; ties broken by name-match presence.
308+
def specificity(entry):
309+
matchParams = entry[3] # policyMatchParams
310+
nameMatch = 1 if "name" in matchParams else 0
311+
return (nameMatch, len(matchParams))
312+
313+
most_specific = max(entries, key=specificity)
314+
result.append(most_specific)
315+
gLogger.debug(
316+
f"postProcessing: multiple {policyType!r} policies matched; "
317+
f"keeping {most_specific[0]!r}, dropping {[e[0] for e in entries if e is not most_specific]}"
318+
)
319+
320+
return result

0 commit comments

Comments
 (0)