@@ -84,31 +84,6 @@ def setup_common_extension() -> CMakeExtension:
8484 cusolvermp_dir = os .getenv ("CUSOLVERMP_HOME" , "/usr" )
8585 cmake_flags .append (f"-DCUSOLVERMP_DIR={ cusolvermp_dir } " )
8686
87- # NCCL EP (Hopper+): on by default; auto-skipped when no arch >= 90 is
88- # targeted. Set NVTE_WITH_NCCL_EP=0 to force off.
89- nccl_ep_env = os .getenv ("NVTE_WITH_NCCL_EP" )
90- nccl_ep_explicit = nccl_ep_env is not None
91- build_with_nccl_ep = bool (int (nccl_ep_env if nccl_ep_explicit else "1" ))
92- if build_with_nccl_ep :
93- arch_tokens = [a .strip () for a in str (archs or "" ).split (";" ) if a .strip ()]
94- has_hopper_or_newer = any (
95- t .lower () == "native" or (t .rstrip ("af" ).isdigit () and int (t .rstrip ("af" )) >= 90 )
96- for t in arch_tokens
97- )
98- if not has_hopper_or_newer :
99- if nccl_ep_explicit :
100- raise RuntimeError (
101- f"NVTE_WITH_NCCL_EP=1 was set but NVTE_CUDA_ARCHS ('{ archs } ') "
102- "contains no arch >= 90. NCCL EP requires Hopper or newer."
103- )
104- print (f"[NCCL EP] No arch >= 90 in NVTE_CUDA_ARCHS ('{ archs } '); skipping build." )
105- build_with_nccl_ep = False
106- if build_with_nccl_ep :
107- nccl_home = build_nccl_ep_submodule ()
108- cmake_flags .append (f"-DNCCL_INCLUDE_DIR={ nccl_home } /include" )
109- else :
110- cmake_flags .append ("-DNVTE_WITH_NCCL_EP=OFF" )
111-
11287 # Add custom CMake arguments from environment variable
11388 nvte_cmake_extra_args = os .getenv ("NVTE_CMAKE_EXTRA_ARGS" )
11489 if nvte_cmake_extra_args :
@@ -155,138 +130,6 @@ def setup_requirements() -> Tuple[List[str], List[str]]:
155130 return [remove_dups (reqs ) for reqs in [install_reqs , test_reqs ]]
156131
157132
158- def _discover_nccl_home () -> str :
159- """Resolve NCCL_HOME: honor env var, else probe well-known prefixes, else ldconfig."""
160- env_home = os .environ .get ("NCCL_HOME" )
161- if env_home :
162- if (Path (env_home ) / "include" / "nccl.h" ).exists ():
163- return env_home
164- print (
165- f"[NCCL EP] WARNING: NCCL_HOME='{ env_home } ' is set but "
166- f"'{ env_home } /include/nccl.h' was not found; falling back to system probes."
167- )
168-
169- lib_names = ("libnccl.so" , "libnccl.so.2" )
170- # Include Debian/Ubuntu multiarch subdirs (e.g. lib/aarch64-linux-gnu).
171- lib_subdirs = ("lib" , "lib64" , "lib/aarch64-linux-gnu" , "lib/x86_64-linux-gnu" )
172-
173- # pip-installed NCCL (nvidia-nccl-cu* wheel) lives under nvidia/nccl in
174- # site-packages and has no top-level include/lib layout.
175- try :
176- import importlib .util
177-
178- spec = importlib .util .find_spec ("nvidia.nccl" )
179- if spec is not None and spec .submodule_search_locations :
180- pip_root = Path (next (iter (spec .submodule_search_locations )))
181- if (pip_root / "include" / "nccl.h" ).exists () and any (
182- (pip_root / sub / name ).exists () for sub in lib_subdirs for name in lib_names
183- ):
184- return str (pip_root )
185- except (ImportError , ValueError ):
186- pass
187-
188- for cand in ("/opt/nvidia/nccl" , "/usr/local/nccl" , "/usr" ):
189- p = Path (cand )
190- if (p / "include" / "nccl.h" ).exists () and any (
191- (p / sub / name ).exists () for sub in lib_subdirs for name in lib_names
192- ):
193- return str (p )
194-
195- try :
196- out = subprocess .check_output (["ldconfig" , "-p" ], stderr = subprocess .DEVNULL ).decode ()
197- for line in out .splitlines ():
198- if "libnccl.so" in line and "=>" in line :
199- lib_path = Path (line .split ("=>" )[- 1 ].strip ())
200- # Walk upward so multiarch layouts (.../lib/<triplet>/libnccl.so)
201- # resolve to the prefix that contains include/nccl.h.
202- for root in (lib_path .parent .parent , lib_path .parent .parent .parent ):
203- if (root / "include" / "nccl.h" ).exists ():
204- return str (root )
205- except (subprocess .CalledProcessError , FileNotFoundError ):
206- pass
207-
208- raise RuntimeError (
209- "Could not locate NCCL core (nccl.h + libnccl.so). Set NCCL_HOME to the install prefix."
210- )
211-
212-
213- def build_nccl_ep_submodule () -> str :
214- """Build libnccl_ep.a from the 3rdparty/nccl submodule and return NCCL_HOME."""
215- nccl_root = current_file_path / "3rdparty" / "nccl"
216- if not (nccl_root / "Makefile" ).exists ():
217- raise RuntimeError (
218- f"NCCL submodule not found at { nccl_root } . "
219- "Run `git submodule update --init --recursive`."
220- )
221-
222- build_dir = nccl_root / "build"
223- nccl_ep_lib = build_dir / "lib" / "libnccl_ep.a"
224- gencode_stamp = build_dir / "lib" / "libnccl_ep.gencode"
225-
226- # Caller gates on arch >= 90 or "native"; expand "native" to the host's
227- # actual sm_XX so the build stamp distinguishes machines.
228- arch_tokens = [a .strip () for a in str (cuda_archs () or "" ).split (";" ) if a .strip ()]
229- arch_list : list [str ] = []
230- for t in arch_tokens :
231- if t .lower () == "native" :
232- try :
233- out = subprocess .check_output (
234- ["nvidia-smi" , "--query-gpu=compute_cap" , "--format=csv,noheader" ],
235- stderr = subprocess .DEVNULL ,
236- ).decode ()
237- except (subprocess .CalledProcessError , FileNotFoundError ) as e :
238- raise RuntimeError (
239- "NVTE_CUDA_ARCHS=native requires nvidia-smi to resolve the host arch."
240- ) from e
241- for line in out .splitlines ():
242- cap = line .strip ().replace ("." , "" )
243- if cap .isdigit () and int (cap ) >= 90 and cap not in arch_list :
244- arch_list .append (cap )
245- else :
246- bare = t .rstrip ("af" )
247- if bare .isdigit () and int (bare ) >= 90 and bare not in arch_list :
248- arch_list .append (bare )
249- if not arch_list :
250- raise RuntimeError (
251- "NCCL EP requires Hopper or newer (SM >= 90); none found in"
252- f" NVTE_CUDA_ARCHS={ cuda_archs ()!r} . Re-run with NVTE_WITH_NCCL_EP=0 to skip the NCCL"
253- " EP build (the rest of TE still builds)."
254- )
255- gencode = " " .join (f"-gencode=arch=compute_{ a } ,code=sm_{ a } " for a in arch_list )
256-
257- nproc = os .cpu_count () or 8
258- env = os .environ .copy ()
259- env ["NVCC_GENCODE" ] = gencode
260- # NCCL EP needs the core NCCL headers + libnccl.so; write NCCL EP build
261- # outputs to the submodule's local build/ tree.
262- nccl_home = _discover_nccl_home ()
263- env ["NCCL_HOME" ] = nccl_home
264- env ["NCCL_EP_BUILDDIR" ] = str (build_dir )
265-
266- prev_gencode = gencode_stamp .read_text ().strip () if gencode_stamp .exists () else None
267- if not nccl_ep_lib .exists () or prev_gencode != gencode :
268- if nccl_ep_lib .exists () and prev_gencode != gencode :
269- print (
270- f"[NCCL EP] gencode changed ('{ prev_gencode } ' -> '{ gencode } '); "
271- "rebuilding libnccl_ep.a"
272- )
273- subprocess .check_call (
274- ["make" , "-C" , "contrib/nccl_ep" , "clean" ],
275- cwd = str (nccl_root ),
276- env = env ,
277- )
278- print (f"[NCCL EP] Building libnccl_ep.a (gencode='{ gencode } ')" )
279- subprocess .check_call (
280- ["make" , "-j" , str (nproc ), "-C" , "contrib/nccl_ep" , "lib" ],
281- cwd = str (nccl_root ),
282- env = env ,
283- )
284- gencode_stamp .parent .mkdir (parents = True , exist_ok = True )
285- gencode_stamp .write_text (gencode )
286-
287- return nccl_home
288-
289-
290133def git_check_submodules () -> None :
291134 """
292135 Attempt to checkout git submodules automatically during setup.
0 commit comments