Skip to content

Commit 3961197

Browse files
committed
refactor(dflash): rename namespace dflash27b → dflash::common
Mechanical rename per weicj's review on PR #237 — the legacy namespace name baked the first backend (Qwen3-27B) into shared code. Renaming to dflash::common removes the backend leak from the substrate so future backends plug into a neutral namespace. Scope: - namespace dflash27b → namespace dflash::common - dflash27b::* → dflash::common::* - CMake static lib dflash27b → dflash_common - CMake project(dflash27b) → project(dflash) - Private CMake vars _dflash27b_* → _dflash_* - Stale comment references Out of scope (deferred to a follow-up): - Public C header dflash/include/dflash27b.h - C symbol dflash27b_last_error() - Preprocessor macros DFLASH27B_* - Env vars DFLASH27B_* Build note: CUDA 12.6 + GCC 13.3 has a known _Float128 conflict during CUDA host-compiler ID detection. Workaround: -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/g++-11 No behavior change. Clean build green. Symbol mangling confirmed as N6dflash6common* via nm; zero residual dflash27b symbols outside the deferred public C ABI.
1 parent 42f36f1 commit 3961197

141 files changed

Lines changed: 1126 additions & 437 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

dflash/CMakeLists.txt

Lines changed: 93 additions & 93 deletions
Large diffs are not rendered by default.

dflash/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ Full `bench_llm.py` suite on Qwen3.6-27B UD-Q4_K_XL, 10 prompts, n_gen=256, RTX
210210
and dispatches by arch:
211211

212212
- `qwen35` / `qwen36` → existing DFlash + DDTree pipeline (no change).
213-
- `laguna``dflash27b::run_laguna_daemon()` (no spec-decode, no DDTree).
213+
- `laguna``dflash::common::run_laguna_daemon()` (no spec-decode, no DDTree).
214214

215215
The daemon stdin/stream-fd protocol is identical, so `scripts/server.py`
216216
drives both arches end-to-end. The only thing the user changes is `--target`.

dflash/scripts/server.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def _extra_daemon_has_target_sharding(extra: list[str] | None) -> bool:
132132
# Architecture strings stored in `general.architecture` of every GGUF this
133133
# server can drive. test_dflash dispatches by GGUF arch internally:
134134
# qwen35 / qwen36 -> existing DFlash + DDTree pipeline
135-
# laguna -> dflash27b::run_laguna_daemon() (no spec-decode)
135+
# laguna -> dflash::common::run_laguna_daemon() (no spec-decode)
136136
# server.py just needs to omit --draft + the DFlash/DDTree flags when the
137137
# arch doesn't support speculative decoding yet.
138138
_QWEN35_ARCHES = {"qwen35", "qwen36"}
@@ -843,7 +843,7 @@ async def _openai_compat_error_handler(_request: Request, exc: OpenAICompatError
843843

844844
if arch in _LAGUNA_ARCHES:
845845
# test_dflash detects arch=laguna from the GGUF and dispatches
846-
# internally to dflash27b::run_laguna_daemon(). No --draft, no
846+
# internally to dflash::common::run_laguna_daemon(). No --draft, no
847847
# --fast-rollback, no --ddtree (no Laguna spec-decode draft yet).
848848
# Tokens stream as int32 LE on stream_fd terminated by -1, byte-
849849
# identical to the qwen35 path so SSE/stream consumers stay shared.

dflash/src/bsa_launcher.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ template<typename T, int Headdim, bool Is_causal>
2828
void run_mha_fwd_block_(Flash_fwd_params &params, cudaStream_t stream);
2929
}
3030

31-
namespace dflash27b {
31+
namespace dflash::common {
3232
namespace flashprefill {
3333

3434
namespace {
@@ -275,4 +275,4 @@ fail:
275275
}
276276

277277
} // namespace flashprefill
278-
} // namespace dflash27b
278+
} // namespace dflash::common

dflash/src/bsa_launcher_hip.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#include <cstdint>
2020
#include <cstdlib> // size_t
2121

22-
namespace dflash27b {
22+
namespace dflash::common {
2323
namespace flashprefill {
2424

2525
// Defined in flashprefill_kernels.hip.cu.
@@ -108,4 +108,4 @@ extern "C" int launch_bsa_sparse_flash_forward_bf16(
108108
}
109109

110110
} // namespace flashprefill
111-
} // namespace dflash27b
111+
} // namespace dflash::common

dflash/src/common/attn_masks.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include <cstdint>
1111
#include <vector>
1212

13-
namespace dflash27b {
13+
namespace dflash::common {
1414

1515
// Minimum alignment required by ggml flash_attn_ext for mask rows.
1616
static constexpr int KQ_MASK_PAD = 32;
@@ -75,4 +75,4 @@ inline void build_tree_mask(const DDTree & tree, int past_length,
7575
}
7676
}
7777

78-
} // namespace dflash27b
78+
} // namespace dflash::common

dflash/src/common/backend_factory.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
#include <cstdio>
1212

13-
namespace dflash27b {
13+
namespace dflash::common {
1414

1515
std::string detect_arch(const char * model_path) {
1616
auto info = inspect_gguf_model_info(model_path);
@@ -107,4 +107,4 @@ std::unique_ptr<ModelBackend> create_backend(const BackendArgs & args) {
107107
}
108108
}
109109

110-
} // namespace dflash27b
110+
} // namespace dflash::common

dflash/src/common/backend_factory.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include <memory>
1717
#include <string>
1818

19-
namespace dflash27b {
19+
namespace dflash::common {
2020

2121
// ─── Backend creation arguments ─────────────────────────────────────────
2222
// A superset of all per-arch config fields. The factory reads only those
@@ -62,4 +62,4 @@ std::unique_ptr<ModelBackend> create_backend(const BackendArgs & args);
6262
// Useful for early dispatch (e.g. printing which backend will be used).
6363
std::string detect_arch(const char * model_path);
6464

65-
} // namespace dflash27b
65+
} // namespace dflash::common

dflash/src/common/daemon_loop.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#define ssize_t long
2626
#endif
2727

28-
namespace dflash27b {
28+
namespace dflash::common {
2929

3030
// ── DaemonIO ────────────────────────────────────────────────────────────
3131

@@ -424,4 +424,4 @@ int run_daemon(ModelBackend & backend, const DaemonLoopArgs & args) {
424424
return 0;
425425
}
426426

427-
} // namespace dflash27b
427+
} // namespace dflash::common

dflash/src/common/daemon_loop.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
#include "model_backend.h"
1313

14-
namespace dflash27b {
14+
namespace dflash::common {
1515

1616
struct DaemonLoopArgs {
1717
int stream_fd = -1;
@@ -23,4 +23,4 @@ struct DaemonLoopArgs {
2323
// commands until `quit`, `exit`, or EOF. Returns 0 on clean shutdown.
2424
int run_daemon(ModelBackend & backend, const DaemonLoopArgs & args);
2525

26-
} // namespace dflash27b
26+
} // namespace dflash::common

0 commit comments

Comments
 (0)