Skip to content

Commit cb9d0bb

Browse files
committed
1
1 parent 814c2cd commit cb9d0bb

8 files changed

Lines changed: 109 additions & 294 deletions

File tree

.github/workflows/ci.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,15 @@ jobs:
2020
runs-on: ubuntu-latest
2121
steps:
2222
- uses: actions/checkout@v4
23+
with:
24+
fetch-depth: 0
2325
- name: Install clang-format
2426
run: sudo apt-get install -y -qq clang-format-18
25-
- name: Run clang-format
26-
run: find taskflowlite \( -name '*.hpp' -o -name '*.cpp' \) | xargs clang-format-18 --dry-run --Werror
27+
- name: Run clang-format (changed files only, advisory)
28+
run: |
29+
git diff origin/main --name-only --diff-filter=AM \
30+
'taskflowlite/**/*.hpp' 'taskflowlite/**/*.cpp' 'test/**/*.cpp' 'examples/**/*.cpp' | \
31+
xargs -r clang-format-18 --dry-run --Wno-error
2732
2833
# clang-tidy (PR only)
2934
clang-tidy:

.github/workflows/macos.yml

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,16 @@ jobs:
99
runs-on: macos-latest
1010
steps:
1111
- uses: actions/checkout@v4
12-
- name: Ensure Xcode 16 (for C++20 <stop_token>)
12+
- name: Select latest Xcode (for C++20 <stop_token>)
1313
run: |
14-
XCODE=$(ls -d /Applications/Xcode_16*.app 2>/dev/null | sort -Vr | head -1)
15-
if [ -n "$XCODE" ]; then sudo xcode-select -s "$XCODE"; fi
16-
xcode-select -p
14+
LATEST=$(ls -d /Applications/Xcode_*.app 2>/dev/null | sort -Vr | head -1)
15+
if [ -n "$LATEST" ]; then
16+
sudo xcode-select -s "$LATEST"
17+
echo "Switched to $(xcode-select -p)"
18+
else
19+
echo "No Xcode found, using default: $(xcode-select -p)"
20+
fi
21+
clang++ --version
1722
- name: Configure
1823
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug -DTASKFLOWLITE_BUILD_TESTS=ON -DTASKFLOWLITE_SANITIZER=ASAN
1924
- name: Build
@@ -27,11 +32,16 @@ jobs:
2732
runs-on: macos-latest
2833
steps:
2934
- uses: actions/checkout@v4
30-
- name: Ensure Xcode 16 (for C++20 <stop_token>)
35+
- name: Select latest Xcode (for C++20 <stop_token>)
3136
run: |
32-
XCODE=$(ls -d /Applications/Xcode_16*.app 2>/dev/null | sort -Vr | head -1)
33-
if [ -n "$XCODE" ]; then sudo xcode-select -s "$XCODE"; fi
34-
xcode-select -p
37+
LATEST=$(ls -d /Applications/Xcode_*.app 2>/dev/null | sort -Vr | head -1)
38+
if [ -n "$LATEST" ]; then
39+
sudo xcode-select -s "$LATEST"
40+
echo "Switched to $(xcode-select -p)"
41+
else
42+
echo "No Xcode found, using default: $(xcode-select -p)"
43+
fi
44+
clang++ --version
3545
- name: Configure
3646
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug -DTASKFLOWLITE_BUILD_TESTS=ON -DTASKFLOWLITE_SANITIZER=TSAN
3747
- name: Build
@@ -50,11 +60,16 @@ jobs:
5060
runs-on: macos-latest
5161
steps:
5262
- uses: actions/checkout@v4
53-
- name: Ensure Xcode 16 (for C++20 <stop_token>)
63+
- name: Select latest Xcode (for C++20 <stop_token>)
5464
run: |
55-
XCODE=$(ls -d /Applications/Xcode_16*.app 2>/dev/null | sort -Vr | head -1)
56-
if [ -n "$XCODE" ]; then sudo xcode-select -s "$XCODE"; fi
57-
xcode-select -p
65+
LATEST=$(ls -d /Applications/Xcode_*.app 2>/dev/null | sort -Vr | head -1)
66+
if [ -n "$LATEST" ]; then
67+
sudo xcode-select -s "$LATEST"
68+
echo "Switched to $(xcode-select -p)"
69+
else
70+
echo "No Xcode found, using default: $(xcode-select -p)"
71+
fi
72+
clang++ --version
5873
- name: Configure
5974
run: cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DTASKFLOWLITE_BUILD_TESTS=ON
6075
- name: Build

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,13 @@ if(TFL_IS_TOP_LEVEL)
185185
-Wno-unused-variable
186186
$<$<CONFIG:Release>:-O3 -march=native>
187187
)
188+
189+
# x86-64 cmpxchg16b 硬件普遍支持,但 GCC 需要 -mcx16 才暴露 __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16
190+
# FreeStack128 (128-bit DWCAS) 依赖此宏判定可用性
191+
if((CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(AMD64)" OR
192+
CMAKE_SYSTEM_PROCESSOR MATCHES "(i[3456]86)"))
193+
target_compile_options(tfl_internal_flags INTERFACE -mcx16)
194+
endif()
188195
elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
189196
target_compile_options(tfl_internal_flags INTERFACE
190197
/W4 /Zc:preprocessor

taskflowlite/core/free_stack.hpp

Lines changed: 14 additions & 223 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/// @file free_stack.hpp
1+
/// @file free_stack.hpp
22
/// @brief 无锁侵入式栈 FreeStack —— Work 内存池的并发原语 (ABA-safe Treiber 栈)。
33
/// @author wicyn
44
/// @contact https://github.com/wicyn
@@ -21,86 +21,8 @@
2121

2222
namespace tfl {
2323

24-
namespace detail {
25-
26-
// ============================================================================
27-
// 平台能力探测
28-
// ============================================================================
29-
30-
// ---- HWASAN / ASAN 归一化 (GCC 用 __SANITIZE_*, Clang 用 __has_feature) --
31-
#if defined(__has_feature)
32-
# if __has_feature(hwaddress_sanitizer)
33-
# define TFL_HAS_HWASAN 1
34-
# endif
35-
# if __has_feature(address_sanitizer)
36-
# define TFL_HAS_ASAN 1
37-
# endif
38-
#endif
39-
#if defined(__SANITIZE_HWADDRESS__)
40-
# define TFL_HAS_HWASAN 1
41-
#endif
42-
#if defined(__SANITIZE_ADDRESS__)
43-
# define TFL_HAS_ASAN 1
44-
#endif
45-
46-
/// @brief 平台是否必须用 128-bit (指针可能 > 48-bit 或高位被污染)。
47-
/// @details LA57 / LVA / HWASAN / ASAN / MTE / PAC 等会占用指针高位, 48-bit 打包不安全。
48-
inline constexpr bool kRequires128 =
49-
#if defined(TFL_PLATFORM_LA57) || \
50-
defined(TFL_PLATFORM_ARM64_LVA) || \
51-
defined(TFL_HAS_HWASAN) || \
52-
defined(TFL_HAS_ASAN) || \
53-
defined(__ARM_FEATURE_MEMORY_TAGGING) || \
54-
defined(__ARM_FEATURE_PAC_DEFAULT)
55-
true
56-
#else
57-
false
58-
#endif
59-
;
60-
61-
/// @brief DWCAS 是否硬件可用 (编译期判定)。
62-
///
63-
/// @details 不依赖 std::atomic<16B>::is_always_lock_free —— 它在某些 ABI 下
64-
/// 即使硬件支持也返回 false (libstdc++ 走 libatomic 路径)。
65-
/// 直接看编译器宏更准。
66-
inline constexpr bool kHasLockFree128 =
67-
#if defined(__x86_64__) || defined(_M_X64)
68-
#if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16)
69-
true
70-
#elif defined(_MSC_VER)
71-
true
72-
#else
73-
false
74-
#endif
75-
#elif defined(__aarch64__)
76-
#if defined(__ARM_FEATURE_ATOMICS)
77-
true
78-
#else
79-
false
80-
#endif
81-
#else
82-
false
83-
#endif
84-
;
85-
86-
8724
// ============================================================================
88-
// Tagged128 —— 16 字节 tagged pointer 布局
89-
// ============================================================================
90-
91-
/// @brief 16 字节对齐的 tagged pointer (ptr 8B + tag 8B), 供 DWCAS 单指令比较。
92-
struct alignas(16) Tagged128 {
93-
void* ptr;
94-
std::size_t tag;
95-
};
96-
static_assert(sizeof(Tagged128) == 16);
97-
static_assert(std::is_trivially_copyable_v<Tagged128>);
98-
99-
} // namespace detail
100-
101-
102-
// ============================================================================
103-
// FreeStack128 —— 16 字节 tagged pointer 版本 (优先)
25+
// FreeStack
10426
// ============================================================================
10527

10628
/// @brief ABA-safe 无锁 LIFO 栈(128-bit tagged pointer DWCAS 实现)。
@@ -109,10 +31,10 @@ static_assert(std::is_trivially_copyable_v<Tagged128>);
10931
/// 需 x86-64 cmpxchg16b 或 ARM64 LSE CASP 硬件支持。整体 alignas(2*cache_line_size) 避免伪共享。
11032
///
11133
/// @note chunk 处于 "storage available, no object" 状态,前 sizeof(void*) 字节可安全复用为链接指针。
112-
class alignas(cache_line_size * 2) FreeStack128 : public Immovable<FreeStack128> {
34+
class alignas(cache_line_size * 2) FreeStack : public Immovable<FreeStack> {
11335
public:
11436
/// @brief 构造空栈 —— 头指针置为 {nullptr, 0}。
115-
FreeStack128() noexcept {
37+
FreeStack() noexcept {
11638
m_head.store(Tagged{nullptr, 0}, std::memory_order_relaxed);
11739
}
11840

@@ -151,154 +73,23 @@ class alignas(cache_line_size * 2) FreeStack128 : public Immovable<FreeStack128>
15173
}
15274

15375
private:
154-
using Tagged = detail::Tagged128;
155-
156-
/// @brief head 单独占一个 cache line, 避免与外部字段伪共享。
157-
alignas(cache_line_size) std::atomic<Tagged> m_head;
158-
};
159-
160-
161-
// ============================================================================
162-
// FreeStack48 —— 48-bit ptr + 16-bit tag 打包版本 (回退)
163-
// ============================================================================
164-
165-
/// @brief ABA-safe 无锁 LIFO 栈(48-bit ptr + 16-bit tag 打包为 64-bit CAS)。
166-
///
167-
/// 单条 cmpxchg 完成,任何 x86-64/ARM64 均 always_lock_free,延迟更低。
168-
/// 仅适用于指针 <= 48-bit 的平台(非 LA57/LVA/HWASAN/ASAN/MTE/PAC)。
169-
///
170-
/// @note 2^16 tag 在 Work 池规模下不会触发 ABA 回卷。
171-
class alignas(cache_line_size * 2) FreeStack48 : public Immovable<FreeStack48> {
172-
public:
173-
/// @brief 构造空栈 —— 头指针置为 0 (48-bit packed 表示)。
174-
FreeStack48() noexcept {
175-
m_head.store(0, std::memory_order_relaxed);
176-
}
177-
178-
/// @brief 检查栈是否为空 (快照, 并发下可能立即过时)。
179-
/// @return 头指针为 null 时返回 true。
180-
[[nodiscard]] bool empty() const noexcept {
181-
return Tagged::ptr_of(m_head.load(std::memory_order_relaxed)) == nullptr;
182-
}
183-
184-
/// @brief 推入一个 chunk 到栈顶 (release-CAS)。
185-
/// @param p 待推入 chunk 起始地址; 必须满足前提且地址 < 2^48。
186-
TFL_FORCE_INLINE void push(void* p) noexcept {
187-
std::uint64_t curr = m_head.load(std::memory_order_relaxed);
188-
while (true) {
189-
ChunkLink::store(p, Tagged::ptr_of(curr));
190-
const std::uint64_t next = Tagged::pack(p, Tagged::tag_of(curr) + 1);
191-
if (m_head.compare_exchange_weak(curr, next, std::memory_order_release, std::memory_order_relaxed)) {
192-
return;
193-
}
194-
// CAS 失败: curr 已自动重载到最新值, 下轮重新链接重试
195-
}
196-
}
197-
198-
/// @brief 从栈顶弹出一个 chunk (acquire-CAS); 空则返回 nullptr。
199-
/// @return chunk 起始地址, 或 nullptr (栈空)。
200-
[[nodiscard]] TFL_FORCE_INLINE void* pop() noexcept {
201-
std::uint64_t curr = m_head.load(std::memory_order_acquire);
202-
while (true) {
203-
void* top = Tagged::ptr_of(curr);
204-
if (top == nullptr) return nullptr;
205-
// top 可能已被别的线程 pop 后再 push 回来, ChunkLink::load
206-
// 读到的 link 也许 "旧", 但 tag 不匹配会让 CAS 失败,
207-
// 旧 link 不会污染 head —— ABA 由 tag 兜底
208-
void* link = ChunkLink::load(top);
209-
const std::uint64_t next = Tagged::pack(link, Tagged::tag_of(curr) + 1);
210-
if (m_head.compare_exchange_weak(curr, next, std::memory_order_release, std::memory_order_acquire)) {
211-
return top;
212-
}
213-
}
214-
}
215-
216-
private:
217-
// ========================================================================
218-
// Tagged —— 48-bit ptr (低) | 16-bit tag (高) 打包/解包
219-
// ========================================================================
220-
221-
/// @brief 把 (ptr, tag) 压成 64-bit, 绕过 16-byte DWCAS 的 ABI 依赖。
222-
///
223-
/// @details 全部为 static 纯函数, Tagged 本身无状态 —— 仅作命名空间用, 无运行时开销。
224-
struct Tagged {
225-
/// @brief 容器总位宽 —— uint64_t 标准保证 64 位。
226-
static constexpr unsigned TOTAL_BITS = sizeof(std::uint64_t) * char_bits;
227-
/// @brief 平台契约: x86_64 / AArch64 用户态规范地址, 有效虚地址 <= 48 位。
228-
/// 若未来需支持 LA57 / 52-bit AArch64, 改此处即可, TAG_BITS 自动收缩。
229-
static constexpr unsigned PTR_BITS = 48;
230-
static constexpr unsigned TAG_BITS = TOTAL_BITS - PTR_BITS;
23176

232-
static constexpr unsigned TAG_SHIFT = PTR_BITS;
233-
static constexpr std::uint64_t PTR_MASK = (std::uint64_t{1} << PTR_BITS) - 1;
234-
static constexpr std::uint64_t TAG_MASK = ((std::uint64_t{1} << TAG_BITS) - 1) << TAG_SHIFT;
77+
// ============================================================================
78+
// Tagged —— 16 字节 tagged pointer 布局
79+
// ============================================================================
23580

236-
static_assert(TOTAL_BITS == 64, "Tagged depends on 64-bit uint64_t");
237-
static_assert(PTR_BITS + TAG_BITS == TOTAL_BITS);
238-
static_assert(PTR_BITS > 0 && TAG_BITS > 0);
239-
240-
/// @brief 从 64-bit 值中提取指针 (低 48 位)。
241-
static void* ptr_of(std::uint64_t v) noexcept {
242-
return reinterpret_cast<void*>(v & PTR_MASK);
243-
}
244-
/// @brief 从 64-bit 值中提取 tag (高 16 位)。
245-
static std::uint64_t tag_of(std::uint64_t v) noexcept {
246-
return v >> TAG_SHIFT;
247-
}
248-
/// @brief 打包 (ptr, tag) -> uint64_t。
249-
/// @param p 必须在低 48 位 canonical 范围内。
250-
/// @param tag 必须 < 2^16。
251-
static std::uint64_t pack(void* p, std::uint64_t tag) noexcept {
252-
const auto pi = reinterpret_cast<std::uintptr_t>(p);
253-
assert((pi & ~PTR_MASK) == 0); ///< 指针契约检查
254-
assert(tag < (std::uint64_t{1} << TAG_BITS)); ///< tag 越界对称检查
255-
return (tag << TAG_SHIFT) | pi;
256-
}
257-
/// @brief 检查指针是否在低 48 位 canonical 范围内。
258-
static bool is_canonical(void* p) noexcept {
259-
return (reinterpret_cast<std::uintptr_t>(p) & ~PTR_MASK) == 0;
260-
}
81+
/// @brief 16 字节对齐的 tagged pointer (ptr 8B + tag 8B), 供 DWCAS 单指令比较。
82+
struct alignas(16) Tagged {
83+
void* ptr;
84+
std::size_t tag;
26185
};
86+
static_assert(sizeof(Tagged) == 16);
87+
static_assert(std::is_trivially_copyable_v<Tagged>);
26288

263-
static_assert(sizeof(void*) == 8, "FreeStack48 assumes 64-bit pointer (x86-64 / ARM64)");
264-
static_assert(std::atomic<std::uint64_t>::is_always_lock_free, "FreeStack48 requires lock-free 8-byte atomics");
26589

26690
/// @brief head 单独占一个 cache line, 避免与外部字段伪共享。
267-
alignas(cache_line_size) std::atomic<std::uint64_t> m_head;
91+
alignas(cache_line_size) std::atomic<Tagged> m_head;
26892
};
26993

27094

271-
// ============================================================================
272-
// FreeStack —— 编译期平台选择
273-
// ============================================================================
274-
275-
#if defined(TFL_FREESTACK_FORCE_128BIT) && defined(TFL_FREESTACK_FORCE_48BIT)
276-
# error "TFL_FREESTACK_FORCE_128BIT and TFL_FREESTACK_FORCE_48BIT are mutually exclusive"
277-
#endif
278-
279-
#if defined(TFL_FREESTACK_FORCE_128BIT)
280-
281-
static_assert(detail::kHasLockFree128,
282-
"TFL_FREESTACK_FORCE_128BIT requires hardware DWCAS support "
283-
"(compile with -mcx16 / -march=armv8.1-a+lse / /arch:AVX)");
284-
using FreeStack = FreeStack128;
285-
286-
#elif defined(TFL_FREESTACK_FORCE_48BIT)
287-
288-
static_assert(!detail::kRequires128,
289-
"TFL_FREESTACK_FORCE_48BIT is unsafe on this platform "
290-
"(LA57 / LVA / HWASAN / MTE / PAC pollutes pointer high bits)");
291-
using FreeStack = FreeStack48;
292-
293-
#else
294-
295-
static_assert(!(detail::kRequires128 && !detail::kHasLockFree128),
296-
"Platform requires FreeStack128 (high pointer bits used) "
297-
"but lock-free 128-bit CAS is unavailable on this target");
298-
299-
/// @brief 平台最优 FreeStack 别名: 128-bit CAS 可用时选 FreeStack128,否则回退 FreeStack48。
300-
using FreeStack = std::conditional_t<detail::kRequires128 || detail::kHasLockFree128, FreeStack128, FreeStack48>;
301-
302-
#endif
303-
30495
} // namespace tfl

taskflowlite/core/runtime.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ inline void Runtime::submit(Task task) {
512512
template <typename Gh>
513513
requires graph_holder<Gh>
514514
inline void Runtime::submit(Gh& gh) {
515-
auto& graph = detail::to_graph(detail::unwrap(gh));
515+
auto& graph = detail::to_graph(gh);
516516
if (graph.empty()) {
517517
return;
518518
}

0 commit comments

Comments
 (0)