Skip to content

Commit 78c70aa

Browse files
authored
Merge pull request #69493 from twangboy/fix/69492/3006.x
Fix lgpo_reg Registry.pol lock races using EnterCriticalPolicySection
2 parents d57c9f0 + a0f8950 commit 78c70aa

5 files changed

Lines changed: 349 additions & 80 deletions

File tree

changelog/69492.fixed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed ``lgpo_reg`` failures on Windows when ``Registry.pol`` is temporarily locked by the Group Policy service or other processes. Salt now uses ``EnterCriticalPolicySection`` / ``LeaveCriticalPolicySection`` from ``userenv.dll`` — the same synchronization primitive used by the GP engine — to serialize read-modify-write access to ``Registry.pol``. A retry loop with configurable attempts and delay is also applied for non-GP lockers such as antivirus scanners or VSS snapshots that do not participate in the GP critical section handshake.

salt/modules/win_lgpo_reg.py

Lines changed: 74 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -523,29 +523,31 @@ def set_value(
523523
msg = f"{v_type} data must be an integer"
524524
raise SaltInvocationError(msg)
525525

526-
pol_data = read_reg_pol(policy_class=policy_class)
527-
528-
found_key, found_name = _find_value(pol_data, key, v_name)
529-
530-
if found_key:
531-
if found_name:
532-
if "**del." in found_name:
533-
log.debug("LGPO_REG Mod: Found disabled name: %s", found_name)
534-
pol_data[found_key][v_name] = pol_data[found_key].pop(found_name)
535-
found_name = v_name
536-
log.debug("LGPO_REG Mod: Updating value: %s", found_name)
537-
pol_data[found_key][found_name] = {"data": v_data, "type": v_type}
526+
machine = policy_class == "Machine"
527+
with salt.utils.win_lgpo_reg._policy_lock(machine=machine):
528+
pol_data = read_reg_pol(policy_class=policy_class)
529+
530+
found_key, found_name = _find_value(pol_data, key, v_name)
531+
532+
if found_key:
533+
if found_name:
534+
if "**del." in found_name:
535+
log.debug("LGPO_REG Mod: Found disabled name: %s", found_name)
536+
pol_data[found_key][v_name] = pol_data[found_key].pop(found_name)
537+
found_name = v_name
538+
log.debug("LGPO_REG Mod: Updating value: %s", found_name)
539+
pol_data[found_key][found_name] = {"data": v_data, "type": v_type}
540+
else:
541+
log.debug("LGPO_REG Mod: Setting new value: %s", found_name)
542+
pol_data[found_key][v_name] = {"data": v_data, "type": v_type}
538543
else:
539-
log.debug("LGPO_REG Mod: Setting new value: %s", found_name)
540-
pol_data[found_key][v_name] = {"data": v_data, "type": v_type}
541-
else:
542-
log.debug("LGPO_REG Mod: Adding new key and value: %s", found_name)
543-
pol_data[key] = {v_name: {"data": v_data, "type": v_type}}
544+
log.debug("LGPO_REG Mod: Adding new key and value: %s", found_name)
545+
pol_data[key] = {v_name: {"data": v_data, "type": v_type}}
544546

545-
success = True
546-
if not write_reg_pol(pol_data, policy_class=policy_class):
547-
log.error("LGPO_REG Mod: Failed to write registry.pol file")
548-
success = False
547+
success = True
548+
if not write_reg_pol(pol_data, policy_class=policy_class):
549+
log.error("LGPO_REG Mod: Failed to write registry.pol file")
550+
success = False
549551

550552
# Resolve auto-detect: skip registry write on Domain Controllers where
551553
# HKLM\SOFTWARE\Policies\ is write-protected by AD security hardening.
@@ -661,40 +663,42 @@ def disable_value(
661663
else:
662664
raise SaltInvocationError("An invalid policy class was specified")
663665

664-
pol_data = read_reg_pol(policy_class=policy_class)
666+
machine = policy_class == "Machine"
667+
with salt.utils.win_lgpo_reg._policy_lock(machine=machine):
668+
pol_data = read_reg_pol(policy_class=policy_class)
665669

666-
found_key, found_name = _find_value(pol_data, key, v_name)
670+
found_key, found_name = _find_value(pol_data, key, v_name)
667671

668-
pol_modified = False
669-
if found_key:
670-
if found_name:
671-
if "**del." in found_name:
672-
log.debug("LGPO_REG Mod: Already disabled: %s", v_name)
672+
pol_modified = False
673+
if found_key:
674+
if found_name:
675+
if "**del." in found_name:
676+
log.debug("LGPO_REG Mod: Already disabled: %s", v_name)
677+
else:
678+
log.debug("LGPO_REG Mod: Disabling value name: %s", v_name)
679+
pol_data[found_key].pop(found_name)
680+
found_name = f"**del.{found_name}"
681+
pol_data[found_key][found_name] = {"data": " ", "type": "REG_SZ"}
682+
pol_modified = True
673683
else:
674-
log.debug("LGPO_REG Mod: Disabling value name: %s", v_name)
675-
pol_data[found_key].pop(found_name)
676-
found_name = f"**del.{found_name}"
677-
pol_data[found_key][found_name] = {"data": " ", "type": "REG_SZ"}
684+
log.debug("LGPO_REG Mod: Setting new disabled value name: %s", v_name)
685+
pol_data[found_key][f"**del.{v_name}"] = {
686+
"data": " ",
687+
"type": "REG_SZ",
688+
}
678689
pol_modified = True
679690
else:
680-
log.debug("LGPO_REG Mod: Setting new disabled value name: %s", v_name)
681-
pol_data[found_key][f"**del.{v_name}"] = {
682-
"data": " ",
683-
"type": "REG_SZ",
684-
}
691+
log.debug(
692+
"LGPO_REG Mod: Adding new key and disabled value name: %s", found_name
693+
)
694+
pol_data[key] = {f"**del.{v_name}": {"data": " ", "type": "REG_SZ"}}
685695
pol_modified = True
686-
else:
687-
log.debug(
688-
"LGPO_REG Mod: Adding new key and disabled value name: %s", found_name
689-
)
690-
pol_data[key] = {f"**del.{v_name}": {"data": " ", "type": "REG_SZ"}}
691-
pol_modified = True
692-
693-
success = True
694-
if pol_modified:
695-
if not write_reg_pol(pol_data, policy_class=policy_class):
696-
log.error("LGPO_REG Mod: Failed to write registry.pol file")
697-
success = False
696+
697+
success = True
698+
if pol_modified:
699+
if not write_reg_pol(pol_data, policy_class=policy_class):
700+
log.error("LGPO_REG Mod: Failed to write registry.pol file")
701+
success = False
698702

699703
# Resolve auto-detect: skip registry delete on Domain Controllers.
700704
if write_registry is None:
@@ -813,29 +817,31 @@ def delete_value(
813817
else:
814818
raise SaltInvocationError("An invalid policy class was specified")
815819

816-
pol_data = read_reg_pol(policy_class=policy_class)
820+
machine = policy_class == "Machine"
821+
with salt.utils.win_lgpo_reg._policy_lock(machine=machine):
822+
pol_data = read_reg_pol(policy_class=policy_class)
817823

818-
found_key, found_name = _find_value(pol_data, key, v_name)
824+
found_key, found_name = _find_value(pol_data, key, v_name)
819825

820-
pol_modified = False
821-
if found_key:
822-
if found_name:
823-
log.debug("LGPO_REG Mod: Removing value name: %s", found_name)
824-
pol_data[found_key].pop(found_name)
825-
pol_modified = True
826-
if len(pol_data[found_key]) == 0:
827-
log.debug("LGPO_REG Mod: Removing empty key: %s", found_key)
828-
pol_data.pop(found_key)
826+
pol_modified = False
827+
if found_key:
828+
if found_name:
829+
log.debug("LGPO_REG Mod: Removing value name: %s", found_name)
830+
pol_data[found_key].pop(found_name)
831+
pol_modified = True
832+
if len(pol_data[found_key]) == 0:
833+
log.debug("LGPO_REG Mod: Removing empty key: %s", found_key)
834+
pol_data.pop(found_key)
835+
else:
836+
log.debug("LGPO_REG Mod: Value name not found: %s", v_name)
829837
else:
830-
log.debug("LGPO_REG Mod: Value name not found: %s", v_name)
831-
else:
832-
log.debug("LGPO_REG Mod: Key not found: %s", key)
838+
log.debug("LGPO_REG Mod: Key not found: %s", key)
833839

834-
success = True
835-
if pol_modified:
836-
if not write_reg_pol(pol_data, policy_class=policy_class):
837-
log.error("LGPO_REG Mod: Failed to write registry.pol file")
838-
success = False
840+
success = True
841+
if pol_modified:
842+
if not write_reg_pol(pol_data, policy_class=policy_class):
843+
log.error("LGPO_REG Mod: Failed to write registry.pol file")
844+
success = False
839845

840846
# Resolve auto-detect: skip registry delete on Domain Controllers.
841847
if write_registry is None:

salt/utils/win_lgpo_reg.py

Lines changed: 94 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@
44
"""
55

66
import ctypes
7+
import ctypes.wintypes
78
import logging
89
import os
910
import re
1011
import struct
12+
import time
13+
from contextlib import contextmanager
1114

1215
import salt.modules.win_file
1316
import salt.utils.files
@@ -151,12 +154,82 @@ def read_reg_pol_file(reg_pol_path):
151154
return return_data
152155

153156

157+
def _write_with_retry(path, data, mode, retry_count, retry_delay):
158+
"""
159+
Write data to a file, retrying on Windows sharing violations (winerror 32).
160+
Fails immediately on any other error (e.g. winerror 5, true access denied).
161+
"""
162+
for attempt in range(1, retry_count + 1):
163+
try:
164+
with salt.utils.files.fopen(path, mode) as f:
165+
f.write(data)
166+
return
167+
except PermissionError as e:
168+
if e.winerror != 32 or attempt == retry_count:
169+
raise
170+
log.warning(
171+
"LGPO_REG Util: %s is locked (attempt %d/%d). "
172+
"Retrying in %d seconds...",
173+
path,
174+
attempt,
175+
retry_count,
176+
retry_delay,
177+
)
178+
time.sleep(retry_delay)
179+
180+
181+
@contextmanager
182+
def _policy_lock(machine=True):
183+
"""
184+
Context manager that holds the Windows GP critical section for the duration
185+
of a read-modify-write cycle on Registry.pol.
186+
187+
EnterCriticalPolicySection / LeaveCriticalPolicySection are the same
188+
primitives gpsvc uses internally, so holding this lock prevents the GP
189+
service from opening the policy file concurrently.
190+
191+
EnterCriticalPolicySection is a blocking call — it does not return until
192+
the critical section is acquired. If gpsvc currently holds it (e.g. during
193+
a background GP refresh), this call blocks until gpsvc releases it, at which
194+
point gpsvc will also have released any file lock on Registry.pol. No retry
195+
loop is needed here; the blocking behavior is the wait mechanism.
196+
197+
The only residual risk after acquiring the critical section is a non-GP
198+
locker (AV scanner, VSS) that does not participate in this handshake; the
199+
_write_with_retry layer handles those.
200+
201+
If the lock cannot be acquired (returns NULL), a CommandExecutionError is
202+
raised. NULL from this API always indicates a genuine system error (handle
203+
exhaustion, access denied creating the kernel object, etc.) — not a "GP
204+
service is busy" condition. The blocking behavior handles the busy case.
205+
Proceeding silently without the lock would risk an uncoordinated
206+
read-modify-write followed by a confusing downstream write error.
207+
"""
208+
userenv = ctypes.WinDLL("userenv.dll")
209+
userenv.EnterCriticalPolicySection.restype = ctypes.wintypes.HANDLE
210+
userenv.EnterCriticalPolicySection.argtypes = [ctypes.c_bool]
211+
userenv.LeaveCriticalPolicySection.restype = ctypes.c_bool
212+
userenv.LeaveCriticalPolicySection.argtypes = [ctypes.wintypes.HANDLE]
213+
214+
handle = userenv.EnterCriticalPolicySection(machine)
215+
if not handle:
216+
raise CommandExecutionError(
217+
"LGPO_REG Util: Failed to acquire GP critical section"
218+
)
219+
try:
220+
yield
221+
finally:
222+
userenv.LeaveCriticalPolicySection(handle)
223+
224+
154225
def write_reg_pol_data(
155226
data_to_write,
156227
policy_file_path,
157228
gpt_extension,
158229
gpt_extension_guid,
159230
gpt_ini_path=GPT_INI_PATH,
231+
retry_count=10,
232+
retry_delay=5,
160233
):
161234
"""
162235
Helper function to actually write the data to a Registry.pol file
@@ -178,6 +251,17 @@ def write_reg_pol_data(
178251
179252
gpt_ini_path (str): The path to the gpt.ini file
180253
254+
retry_count (int): Number of attempts to make when a write fails due
255+
to a sharing violation (``winerror 32``). Sharing violations occur
256+
when a process such as an antivirus scanner or VSS holds the file
257+
open with an incompatible sharing mode. The GP critical section
258+
(see :func:`_policy_lock`) prevents races with ``gpsvc`` itself,
259+
so retries are primarily a fallback for those other lockers.
260+
Default is ``10``.
261+
262+
retry_delay (int): Seconds to wait between retry attempts when a
263+
sharing violation is encountered. Default is ``5``.
264+
181265
Returns:
182266
bool: True if successful
183267
@@ -191,14 +275,14 @@ def write_reg_pol_data(
191275
if data_to_write is None:
192276
data_to_write = b""
193277
try:
194-
with salt.utils.files.fopen(policy_file_path, "wb") as pol_file:
195-
reg_pol_header = REG_POL_HEADER.encode("utf-16-le")
196-
if not data_to_write.startswith(reg_pol_header):
197-
log.debug("LGPO_REG Util: Writing header to %s", policy_file_path)
198-
pol_file.write(reg_pol_header)
199-
log.debug("LGPO_REG Util: Writing to %s", policy_file_path)
200-
pol_file.write(data_to_write)
201-
# TODO: This needs to be more specific
278+
reg_pol_header = REG_POL_HEADER.encode("utf-16-le")
279+
if not data_to_write.startswith(reg_pol_header):
280+
log.debug("LGPO_REG Util: Writing header to %s", policy_file_path)
281+
data_to_write = reg_pol_header + data_to_write
282+
log.debug("LGPO_REG Util: Writing to %s", policy_file_path)
283+
_write_with_retry(
284+
policy_file_path, data_to_write, "wb", retry_count, retry_delay
285+
)
202286
except Exception as e: # pylint: disable=broad-except
203287
msg = (
204288
"An error occurred attempting to write to registry.pol\n"
@@ -297,12 +381,10 @@ def write_reg_pol_data(
297381
)
298382
if gpt_ini_data:
299383
try:
300-
with salt.utils.files.fopen(gpt_ini_path, "w") as gpt_file:
301-
gpt_file.write(gpt_ini_data)
302-
# TODO: This needs to be more specific
384+
_write_with_retry(gpt_ini_path, gpt_ini_data, "w", retry_count, retry_delay)
303385
except Exception as e: # pylint: disable=broad-except
304386
msg = (
305-
"An error occurred attempting to write the gpg.ini file.\n"
387+
"An error occurred attempting to write the gpt.ini file.\n"
306388
"path: {}\n"
307389
"exception: {}".format(gpt_ini_path, e)
308390
)

0 commit comments

Comments
 (0)