@@ -31,25 +31,31 @@ namespace snmalloc
3131
3232 /* *
3333 * Copy a single element where source and destination may overlap.
34- * Uses __builtin_memmove which the compiler can optimize to register-width
35- * loads/stores while correctly handling overlap. We cannot use
36- * __builtin_memcpy_inline (ASan treats it as memcpy and flags overlap)
37- * or struct copy (compiler lowers *d = *s to a memcpy call, same problem).
34+ *
35+ * Reads src into a fresh stack temporary, then writes the temporary to
36+ * dst. The temporary aliases neither src nor dst, so each half is a
37+ * non-overlapping copy: ASan stays happy, and we avoid __builtin_memmove
38+ * (which is permitted to lower back to a memmove call and could recurse
39+ * into our own memmove when this header is used to implement that libc
40+ * symbol). For the sizes we actually instantiate (1, sizeof(uint64_t),
41+ * 16) every supported toolchain compiles the struct assignments below
42+ * to register-width loads/stores rather than a library call.
43+ *
44+ * On StrictProvenance/CHERI this helper is only invoked with Size == 1
45+ * (via block_copy_move<1>/block_reverse_copy<1>); the pointer-preserving
46+ * paths copy through Ptr2/void** directly, so capability tags are
47+ * preserved by the surrounding code, not here.
3848 */
3949 template <size_t Size>
4050 SNMALLOC_FAST_PATH_INLINE void copy_one_move (void * dst, const void * src)
4151 {
42- #if __has_builtin(__builtin_memmove)
43- __builtin_memmove (dst, src, Size);
44- #else
45- // Fallback: byte-by-byte copy through a temporary buffer to avoid
46- // the compiler generating a memcpy call for struct assignment.
47- char tmp[Size];
48- for (size_t i = 0 ; i < Size; ++i)
49- tmp[i] = static_cast <const char *>(src)[i];
50- for (size_t i = 0 ; i < Size; ++i)
51- static_cast <char *>(dst)[i] = tmp[i];
52- #endif
52+ struct Block
53+ {
54+ char data[Size];
55+ };
56+
57+ Block tmp = *static_cast <const Block*>(src);
58+ *static_cast <Block*>(dst) = tmp;
5359 }
5460
5561 /* *
@@ -257,7 +263,7 @@ namespace snmalloc
257263 * Reverse copy for overlapping memmove where dst > src.
258264 * Caller guarantees len > 0 and dst != src.
259265 */
260- static void * move (void * dst, const void * src, size_t len)
266+ static void * reverse_move (void * dst, const void * src, size_t len)
261267 {
262268 auto orig_dst = dst;
263269 block_reverse_copy<LargestRegisterSize>(dst, src, len);
@@ -507,7 +513,7 @@ namespace snmalloc
507513 * to a pointer-aligned address in the other, so integer-only
508514 * reverse copy is safe.
509515 */
510- static void * move (void * dst, const void * src, size_t len)
516+ static void * reverse_move (void * dst, const void * src, size_t len)
511517 {
512518 auto orig_dst = dst;
513519
@@ -701,7 +707,7 @@ namespace snmalloc
701707 * No rep movsb in reverse (ERMS doesn't support DF=1), so use
702708 * SSE-width block_reverse_copy.
703709 */
704- static void * move (void * dst, const void * src, size_t len)
710+ static void * reverse_move (void * dst, const void * src, size_t len)
705711 {
706712 auto orig_dst = dst;
707713 block_reverse_copy<LargestRegisterSize>(dst, src, len);
@@ -770,7 +776,7 @@ namespace snmalloc
770776 /* *
771777 * Reverse copy for overlapping memmove where dst > src.
772778 */
773- static void * move (void * dst, const void * src, size_t len)
779+ static void * reverse_move (void * dst, const void * src, size_t len)
774780 {
775781 auto orig_dst = dst;
776782 block_reverse_copy<LargestRegisterSize>(dst, src, len);
@@ -823,8 +829,9 @@ namespace snmalloc
823829
824830 /* *
825831 * Snmalloc checked memmove. Handles overlapping source and destination
826- * by selecting forward copy (Arch::copy) or reverse copy (Arch::move)
827- * as appropriate.
832+ * by selecting Arch::copy (no overlap), Arch::forward_move (dst < src,
833+ * overlap-safe forward) or Arch::reverse_move (dst > src, overlap-safe
834+ * reverse) as appropriate.
828835 */
829836 template <
830837 bool Checked,
@@ -849,7 +856,7 @@ namespace snmalloc
849856 {
850857 if ((dst_addr - src_addr) >= len)
851858 return Arch::copy (dst, src, len); // no overlap
852- return Arch::move (dst, src, len); // reverse copy
859+ return Arch::reverse_move (dst, src, len); // reverse copy
853860 }
854861 // dst_addr < src_addr (dst == src already handled)
855862 if ((src_addr - dst_addr) >= len)
0 commit comments