Luce-Org
diff --git a/‎dflash/CMakeLists.txt‎
Lines changed: 93 additions & 93 deletions b/‎dflash/CMakeLists.txt‎
Lines changed: 93 additions & 93 deletions
diff --git a/‎dflash/README.md‎
Lines changed: 1 addition & 1 deletion b/‎dflash/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎dflash/scripts/server.py‎
Lines changed: 2 additions & 2 deletions b/‎dflash/scripts/server.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dflash/src/bsa_launcher.cu‎
Lines changed: 2 additions & 2 deletions b/‎dflash/src/bsa_launcher.cu‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dflash/src/bsa_launcher_hip.cu‎
Lines changed: 2 additions & 2 deletions b/‎dflash/src/bsa_launcher_hip.cu‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dflash/src/common/attn_masks.h‎
Lines changed: 2 additions & 2 deletions b/‎dflash/src/common/attn_masks.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dflash/src/common/backend_factory.cpp‎
Lines changed: 2 additions & 2 deletions b/‎dflash/src/common/backend_factory.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dflash/src/common/backend_factory.h‎
Lines changed: 2 additions & 2 deletions b/‎dflash/src/common/backend_factory.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dflash/src/common/daemon_loop.cpp‎
Lines changed: 2 additions & 2 deletions b/‎dflash/src/common/daemon_loop.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎dflash/src/common/daemon_loop.h‎
Lines changed: 2 additions & 2 deletions b/‎dflash/src/common/daemon_loop.h‎
Lines changed: 2 additions & 2 deletions
@@ -210,7 +210,7 @@ Full `bench_llm.py` suite on Qwen3.6-27B UD-Q4_K_XL, 10 prompts, n_gen=256, RTX
 and dispatches by arch:
 
   - `qwen35` / `qwen36` → existing DFlash + DDTree pipeline (no change).
-  - `laguna` → `dflash27b::run_laguna_daemon()` (no spec-decode, no DDTree).
+  - `laguna` → `dflash::common::run_laguna_daemon()` (no spec-decode, no DDTree).
 
 The daemon stdin/stream-fd protocol is identical, so `scripts/server.py`
 drives both arches end-to-end. The only thing the user changes is `--target`.
 
@@ -132,7 +132,7 @@ def _extra_daemon_has_target_sharding(extra: list[str] | None) -> bool:
 # Architecture strings stored in `general.architecture` of every GGUF this
 # server can drive. test_dflash dispatches by GGUF arch internally:
 #   qwen35 / qwen36  -> existing DFlash + DDTree pipeline
-#   laguna           -> dflash27b::run_laguna_daemon() (no spec-decode)
+#   laguna           -> dflash::common::run_laguna_daemon() (no spec-decode)
 # server.py just needs to omit --draft + the DFlash/DDTree flags when the
 # arch doesn't support speculative decoding yet.
 _QWEN35_ARCHES = {"qwen35", "qwen36"}
@@ -843,7 +843,7 @@ async def _openai_compat_error_handler(_request: Request, exc: OpenAICompatError
 
     if arch in _LAGUNA_ARCHES:
         # test_dflash detects arch=laguna from the GGUF and dispatches
-        # internally to dflash27b::run_laguna_daemon(). No --draft, no
+        # internally to dflash::common::run_laguna_daemon(). No --draft, no
         # --fast-rollback, no --ddtree (no Laguna spec-decode draft yet).
         # Tokens stream as int32 LE on stream_fd terminated by -1, byte-
         # identical to the qwen35 path so SSE/stream consumers stay shared.
 
@@ -28,7 +28,7 @@ template<typename T, int Headdim, bool Is_causal>
 void run_mha_fwd_block_(Flash_fwd_params &params, cudaStream_t stream);
 }
 
-namespace dflash27b {
+namespace dflash::common {
 namespace flashprefill {
 
 namespace {
@@ -275,4 +275,4 @@ fail:
 }
 
 }  // namespace flashprefill
-}  // namespace dflash27b
+}  // namespace dflash::common
@@ -19,7 +19,7 @@
 #include <cstdint>
 #include <cstdlib>          // size_t
 
-namespace dflash27b {
+namespace dflash::common {
 namespace flashprefill {
 
 // Defined in flashprefill_kernels.hip.cu.
@@ -108,4 +108,4 @@ extern "C" int launch_bsa_sparse_flash_forward_bf16(
 }
 
 }  // namespace flashprefill
-}  // namespace dflash27b
+}  // namespace dflash::common
@@ -10,7 +10,7 @@
 #include <cstdint>
 #include <vector>
 
-namespace dflash27b {
+namespace dflash::common {
 
 // Minimum alignment required by ggml flash_attn_ext for mask rows.
 static constexpr int KQ_MASK_PAD = 32;
@@ -75,4 +75,4 @@ inline void build_tree_mask(const DDTree & tree, int past_length,
     }
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common
@@ -10,7 +10,7 @@
 
 #include <cstdio>
 
-namespace dflash27b {
+namespace dflash::common {
 
 std::string detect_arch(const char * model_path) {
     auto info = inspect_gguf_model_info(model_path);
@@ -107,4 +107,4 @@ std::unique_ptr<ModelBackend> create_backend(const BackendArgs & args) {
     }
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common
@@ -16,7 +16,7 @@
 #include <memory>
 #include <string>
 
-namespace dflash27b {
+namespace dflash::common {
 
 // ─── Backend creation arguments ─────────────────────────────────────────
 // A superset of all per-arch config fields. The factory reads only those
@@ -62,4 +62,4 @@ std::unique_ptr<ModelBackend> create_backend(const BackendArgs & args);
 // Useful for early dispatch (e.g. printing which backend will be used).
 std::string detect_arch(const char * model_path);
 
-}  // namespace dflash27b
+}  // namespace dflash::common
@@ -25,7 +25,7 @@
 #define ssize_t long
 #endif
 
-namespace dflash27b {
+namespace dflash::common {
 
 // ── DaemonIO ────────────────────────────────────────────────────────────
 
@@ -424,4 +424,4 @@ int run_daemon(ModelBackend & backend, const DaemonLoopArgs & args) {
     return 0;
 }
 
-}  // namespace dflash27b
+}  // namespace dflash::common
@@ -11,7 +11,7 @@
 
 #include "model_backend.h"
 
-namespace dflash27b {
+namespace dflash::common {
 
 struct DaemonLoopArgs {
     int stream_fd = -1;
@@ -23,4 +23,4 @@ struct DaemonLoopArgs {
 // commands until `quit`, `exit`, or EOF.  Returns 0 on clean shutdown.
 int run_daemon(ModelBackend & backend, const DaemonLoopArgs & args);
 
-}  // namespace dflash27b
+}  // namespace dflash::common
Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ template<typename T, int Headdim, bool Is_causal>`
`28`	`28`	`void run_mha_fwd_block_(Flash_fwd_params &params, cudaStream_t stream);`
`29`	`29`	`}`
`30`	`30`
`31`		`-namespace dflash27b {`
	`31`	`+namespace dflash::common {`
`32`	`32`	`namespace flashprefill {`
`33`	`33`
`34`	`34`	`namespace {`
`@@ -275,4 +275,4 @@ fail:`
`275`	`275`	`}`
`276`	`276`
`277`	`277`	`} // namespace flashprefill`
`278`		`-} // namespace dflash27b`
	`278`	`+} // namespace dflash::common`
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@`
`10`	`10`	`#include <cstdint>`
`11`	`11`	`#include <vector>`
`12`	`12`
`13`		`-namespace dflash27b {`
	`13`	`+namespace dflash::common {`
`14`	`14`
`15`	`15`	`// Minimum alignment required by ggml flash_attn_ext for mask rows.`
`16`	`16`	`static constexpr int KQ_MASK_PAD = 32;`
`@@ -75,4 +75,4 @@ inline void build_tree_mask(const DDTree & tree, int past_length,`
`75`	`75`	`}`
`76`	`76`	`}`
`77`	`77`
`78`		`-} // namespace dflash27b`
	`78`	`+} // namespace dflash::common`