Skip to content

Commit c93ea4d

Browse files
committed
early feedback
1 parent e63c77f commit c93ea4d

1 file changed

Lines changed: 29 additions & 22 deletions

File tree

src/snmalloc/global/memcpy.h

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -31,25 +31,31 @@ namespace snmalloc
3131

3232
/**
3333
* Copy a single element where source and destination may overlap.
34-
* Uses __builtin_memmove which the compiler can optimize to register-width
35-
* loads/stores while correctly handling overlap. We cannot use
36-
* __builtin_memcpy_inline (ASan treats it as memcpy and flags overlap)
37-
* or struct copy (compiler lowers *d = *s to a memcpy call, same problem).
34+
*
35+
* Reads src into a fresh stack temporary, then writes the temporary to
36+
* dst. The temporary aliases neither src nor dst, so each half is a
37+
* non-overlapping copy: ASan stays happy, and we avoid __builtin_memmove
38+
* (which is permitted to lower back to a memmove call and could recurse
39+
* into our own memmove when this header is used to implement that libc
40+
* symbol). For the sizes we actually instantiate (1, sizeof(uint64_t),
41+
* 16) every supported toolchain compiles the struct assignments below
42+
* to register-width loads/stores rather than a library call.
43+
*
44+
* On StrictProvenance/CHERI this helper is only invoked with Size == 1
45+
* (via block_copy_move<1>/block_reverse_copy<1>); the pointer-preserving
46+
* paths copy through Ptr2/void** directly, so capability tags are
47+
* preserved by the surrounding code, not here.
3848
*/
3949
template<size_t Size>
4050
SNMALLOC_FAST_PATH_INLINE void copy_one_move(void* dst, const void* src)
4151
{
42-
#if __has_builtin(__builtin_memmove)
43-
__builtin_memmove(dst, src, Size);
44-
#else
45-
// Fallback: byte-by-byte copy through a temporary buffer to avoid
46-
// the compiler generating a memcpy call for struct assignment.
47-
char tmp[Size];
48-
for (size_t i = 0; i < Size; ++i)
49-
tmp[i] = static_cast<const char*>(src)[i];
50-
for (size_t i = 0; i < Size; ++i)
51-
static_cast<char*>(dst)[i] = tmp[i];
52-
#endif
52+
struct Block
53+
{
54+
char data[Size];
55+
};
56+
57+
Block tmp = *static_cast<const Block*>(src);
58+
*static_cast<Block*>(dst) = tmp;
5359
}
5460

5561
/**
@@ -257,7 +263,7 @@ namespace snmalloc
257263
* Reverse copy for overlapping memmove where dst > src.
258264
* Caller guarantees len > 0 and dst != src.
259265
*/
260-
static void* move(void* dst, const void* src, size_t len)
266+
static void* reverse_move(void* dst, const void* src, size_t len)
261267
{
262268
auto orig_dst = dst;
263269
block_reverse_copy<LargestRegisterSize>(dst, src, len);
@@ -507,7 +513,7 @@ namespace snmalloc
507513
* to a pointer-aligned address in the other, so integer-only
508514
* reverse copy is safe.
509515
*/
510-
static void* move(void* dst, const void* src, size_t len)
516+
static void* reverse_move(void* dst, const void* src, size_t len)
511517
{
512518
auto orig_dst = dst;
513519

@@ -701,7 +707,7 @@ namespace snmalloc
701707
* No rep movsb in reverse (ERMS doesn't support DF=1), so use
702708
* SSE-width block_reverse_copy.
703709
*/
704-
static void* move(void* dst, const void* src, size_t len)
710+
static void* reverse_move(void* dst, const void* src, size_t len)
705711
{
706712
auto orig_dst = dst;
707713
block_reverse_copy<LargestRegisterSize>(dst, src, len);
@@ -770,7 +776,7 @@ namespace snmalloc
770776
/**
771777
* Reverse copy for overlapping memmove where dst > src.
772778
*/
773-
static void* move(void* dst, const void* src, size_t len)
779+
static void* reverse_move(void* dst, const void* src, size_t len)
774780
{
775781
auto orig_dst = dst;
776782
block_reverse_copy<LargestRegisterSize>(dst, src, len);
@@ -823,8 +829,9 @@ namespace snmalloc
823829

824830
/**
825831
* Snmalloc checked memmove. Handles overlapping source and destination
826-
* by selecting forward copy (Arch::copy) or reverse copy (Arch::move)
827-
* as appropriate.
832+
* by selecting Arch::copy (no overlap), Arch::forward_move (dst < src,
833+
* overlap-safe forward) or Arch::reverse_move (dst > src, overlap-safe
834+
* reverse) as appropriate.
828835
*/
829836
template<
830837
bool Checked,
@@ -849,7 +856,7 @@ namespace snmalloc
849856
{
850857
if ((dst_addr - src_addr) >= len)
851858
return Arch::copy(dst, src, len); // no overlap
852-
return Arch::move(dst, src, len); // reverse copy
859+
return Arch::reverse_move(dst, src, len); // reverse copy
853860
}
854861
// dst_addr < src_addr (dst == src already handled)
855862
if ((src_addr - dst_addr) >= len)

0 commit comments

Comments
 (0)