Skip to content

Commit 690a25f

Browse files
authored
[clang] Don't optimize out no-op atomics in kernel mode (llvm#193562)
The no-op atomics like InterlockedAnd(addr, (UINT32)-1) don't modify the underlying value, however kernel code depends on these accesses to touch the pool page virtual address and intentionally trigger a page fault during page migration. This patch also fixes an LLVM issue where idempotent volatile atomics were incorrectly lowered into memory fences.
1 parent 9cd3c0b commit 690a25f

5 files changed

Lines changed: 39 additions & 0 deletions

File tree

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,9 @@ Value *MakeBinaryAtomicValue(
315315

316316
llvm::Value *Result =
317317
CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
318+
// Consider atomics to be volatile in MS kernel mode.
319+
if (CGF.CGM.getLangOpts().Kernel)
320+
cast<llvm::AtomicRMWInst>(Result)->setVolatile(true);
318321
return EmitFromInt(CGF, Result, T, ValueType);
319322
}
320323

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Check that we don't fold no-op andl to memory barrier
2+
// REQUIRES: aarch64-registered-target
3+
// RUN: %clang_cc1 -fms-kernel -fms-extensions -Wno-implicit-function-declaration -triple aarch64-pc-win32 -O2 -S -o - %s | FileCheck %s --check-prefix=ARM64
4+
5+
// ARM64: ldaxr
6+
// ARM64-NEXT: stlxr
7+
// ARM64-NEXT: cbnz
8+
9+
void access_via_interlocked(long volatile* addr) {
10+
_InterlockedAnd(addr, (long)-1);
11+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Check that we don't fold no-op andl to memory barrier
2+
// REQUIRES: x86-registered-target
3+
// RUN: %clang_cc1 -fms-kernel -fms-extensions -Wno-implicit-function-declaration -triple x86_64-pc-win32 -O2 -S -o - %s | FileCheck %s --check-prefix=X86
4+
5+
// X86: lock andl $-1, (%rcx)
6+
// X86-NEXT: retq
7+
8+
void access_via_interlocked(long volatile* addr) {
9+
_InterlockedAnd(addr, (long)-1);
10+
}

llvm/lib/CodeGen/AtomicExpandPass.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1710,6 +1710,8 @@ bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
17101710
}
17111711

17121712
bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1713+
if (RMWI->isVolatile())
1714+
return false;
17131715
// TODO: Add floating point support.
17141716
auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
17151717
if (!C)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
; RUN: opt -S -passes='require<libcall-lowering-info>,expand-ir-insts,atomic-expand' %s -o - | FileCheck %s
2+
3+
; volatile atomicrmw shouldn't be converted to a fence
4+
; CHECK: %0 = atomicrmw volatile and ptr %addr, i32 -1 seq_cst
5+
6+
target triple = "x86_64-pc-windows-msvc"
7+
8+
define dso_local void @access_via_interlocked(ptr noundef %addr) {
9+
entry:
10+
%0 = atomicrmw volatile and ptr %addr, i32 -1 seq_cst, align 4
11+
ret void
12+
}
13+

0 commit comments

Comments
 (0)