cleanup code

Ceng23333 · Ceng23333 · commit e0ba49f9ae2b · 2026-04-08T09:12:04.000Z
Signed-off-by: Ceng23333 &lt;441651826@qq.com&gt;
diff --git a/examples/collect_metrics_longtext_decode.py b/examples/collect_metrics_longtext_decode.py
@@ -26,12 +26,6 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../python"))
-try:
-    from infllmv2_loader import preload_infllmv2_if_available
-
-    preload_infllmv2_if_available()
-except Exception:
-    pass
 
 
 def _poll_gpu_mem_mib(stop: threading.Event, gpu_index: int, out: List[int]) -> None:
diff --git a/examples/compare_inference_speed.py b/examples/compare_inference_speed.py
@@ -26,16 +26,6 @@
 
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../python"))
 
-try:
-    # Best-effort InfLLM-v2 preload to avoid requiring LD_PRELOAD in
-    # profiling tools like nsys. Safe when infllm_v2 is absent.
-    from infllmv2_loader import preload_infllmv2_if_available as _preload_infllmv2_if_available
-except Exception:  # pragma: no cover - defensive import guard
-    _preload_infllmv2_if_available = None
-
-if _preload_infllmv2_if_available is not None:
-    _preload_infllmv2_if_available()
-
 def _build_chat_input_ids(tokenizer, prompt: str):
     conversation = [{"role": "user", "content": prompt}]
     text = tokenizer.apply_chat_template(
diff --git a/examples/eval_tasks/mmlu_pro_val/mmlu_pro_val_step_trace.py b/examples/eval_tasks/mmlu_pro_val/mmlu_pro_val_step_trace.py
@@ -12,10 +12,6 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from infinilm.infllmv2_loader import preload_infllmv2_if_available
-
-preload_infllmv2_if_available()
-
 import infinicore
 from infinilm.cache import StaticKVCacheConfig
 from infinilm.distributed import DistConfig
diff --git a/examples/metrics_16k_prefill.md b/examples/metrics_16k_prefill.md
@@ -101,7 +101,7 @@ nsys stats --report nvtx_sum --format table ${OUT}/infinilm_prefill_16k.nsys-rep
 
 When profiling with `nsys`, setting `LD_PRELOAD` to the `infllm_v2` extension can break `nsys` itself (loader errors from PyTorch's `libtorch_python.so`). To make `nsys profile ... python ...` work reliably, we preload the InfLLM-v2 `.so` **inside Python** (RTLD_GLOBAL) before importing `infinicore`, so that `libinfinicore_cpp_api.so` can resolve `mha_varlen_fwd` / `mha_fwd_kvcache` without using `LD_PRELOAD`.
 
-- **Added helper**: `InfiniLM/python/infllmv2_loader.py`
+- **Note**: InfLLM-v2 is now linked normally via InfiniCore build; no Python-side preload helper is required.
 - **Wired into scripts** (preload before `import infinicore`):
   - `InfiniLM/examples/compare_inference_speed.py`
   - `InfiniLM/examples/profile_prefill_infinilm_torchprof.py`
diff --git a/examples/minicpm_sala_logits_sanity.py b/examples/minicpm_sala_logits_sanity.py
@@ -6,10 +6,6 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-from infllmv2_loader import preload_infllmv2_if_available
-
-preload_infllmv2_if_available()
-
 import infinicore
 from infinilm.distributed import DistConfig
 from infinilm.infer_engine import InferEngine
diff --git a/examples/profile_prefill_infinilm_torchprof.py b/examples/profile_prefill_infinilm_torchprof.py
@@ -5,10 +5,6 @@
 import torch
 from transformers import AutoTokenizer
 
-from infllmv2_loader import preload_infllmv2_if_available
-
-preload_infllmv2_if_available()
-
 import infinicore
 from infinilm.distributed import DistConfig
 from infinilm.infer_engine import InferEngine
diff --git a/python/infinilm/__init__.py b/python/infinilm/__init__.py
@@ -1,9 +1,3 @@
-from .infllmv2_loader import preload_infllmv2_if_available
-
-# InfiniCore may depend on InfLLM-v2 symbols (mha_varlen_fwd); load extension
-# RTLD_GLOBAL before any submodule imports infinicore.
-preload_infllmv2_if_available()
-
 from .models import AutoLlamaModel
 from . import distributed
 from . import cache
diff --git a/python/infinilm/infllmv2_loader.py b/python/infinilm/infllmv2_loader.py
diff --git a/python/infllmv2_loader.py b/python/infllmv2_loader.py