Skip to content

Commit 30f7e8b

Browse files
committed
feat: add --fast-build dev-iteration build mode (prototype)
New 'Fast' build type for fast edit-rebuild-run iteration (e.g. GPU print debugging). It matches none of the Release-only (IPO, -march=native) or Debug/RelDebug-only (MFC_DEBUG, -gpu=debug) conditional blocks, so it inherits none of them; adds a light -O1. On NVHPC GPU builds it autodetects the node's single compute capability (nvidia-smi) and overrides the multi-arch MFC_CUDA_CC, with MFC_FAST_ARCH as a login-node escape hatch. Measured (NVHPC 24.5, RTX 6000 cc75, generic simulation, 8 cores): clean build 641s (Release fat 5-arch) -> 170s (3.8x) hot-module 385s (Release fat 5-arch) -> 79s (4.9x) Verified: builds with no IPO/MFC_DEBUG, runs a 1D case on GPU to exit 0. Adds fast_build to MFCConfig (auto --fast-build/--no-fast-build, own slug); bumps lock version to 9 for the new config field.
1 parent 4a73bf3 commit 30f7e8b

4 files changed

Lines changed: 73 additions & 6 deletions

File tree

CMakeLists.txt

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ if (MFC_ALL)
4040
endif()
4141

4242
# Validate CMAKE_BUILD_TYPE to catch typos (CMake is case-sensitive).
43-
set(_VALID_BUILD_TYPES "Debug" "Release" "RelDebug" "")
43+
set(_VALID_BUILD_TYPES "Debug" "Release" "RelDebug" "Fast" "")
4444
if (NOT CMAKE_BUILD_TYPE IN_LIST _VALID_BUILD_TYPES)
45-
message(FATAL_ERROR "Unknown CMAKE_BUILD_TYPE '${CMAKE_BUILD_TYPE}'. Valid: Debug, RelDebug, Release")
45+
message(FATAL_ERROR "Unknown CMAKE_BUILD_TYPE '${CMAKE_BUILD_TYPE}'. Valid: Debug, RelDebug, Release, Fast")
4646
endif()
4747

4848
# RelDebug: a lighter debug mode for CI. Compiler-specific blocks below add the
@@ -51,6 +51,13 @@ set(CMAKE_C_FLAGS_RELDEBUG "-g" CACHE STRING "")
5151
set(CMAKE_CXX_FLAGS_RELDEBUG "-g" CACHE STRING "")
5252
set(CMAKE_Fortran_FLAGS_RELDEBUG "-g" CACHE STRING "")
5353

54+
# Fast: fast-iteration dev builds (e.g. GPU print-debugging). Deliberately matches
55+
# none of the Release-only (IPO, -march=native) or Debug/RelDebug-only (MFC_DEBUG,
56+
# -gpu=debug) conditional blocks below, so it inherits none of them - just a light -O1.
57+
set(CMAKE_C_FLAGS_FAST "-O1" CACHE STRING "")
58+
set(CMAKE_CXX_FLAGS_FAST "-O1" CACHE STRING "")
59+
set(CMAKE_Fortran_FLAGS_FAST "-O1" CACHE STRING "")
60+
5461
if (MFC_SINGLE_PRECISION)
5562
add_compile_definitions(MFC_SINGLE_PRECISION)
5663
else()
@@ -330,6 +337,14 @@ if (CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelDebug")
330337
add_compile_definitions(MFC_DEBUG)
331338
endif()
332339

340+
# Fast: light optimization for dev iteration. Like Debug/RelDebug, the real opt
341+
# flag is injected here (the CMAKE_*_FLAGS_FAST cache vars are placeholders).
342+
# -O1 keeps compile time low while giving acceptable runtime; no MFC_DEBUG, so
343+
# device routines stay free of host-only debug aborts and compile without IPO.
344+
if (CMAKE_BUILD_TYPE STREQUAL "Fast")
345+
add_compile_options($<$<COMPILE_LANGUAGE:Fortran>:-O1>)
346+
endif()
347+
333348

334349

335350
# HANDLE_SOURCES: Given a target (herein <target>):

toolchain/mfc/build.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,53 @@
2727
_MAKE_PROGRESS_RE = re.compile(r"^\[\s*(\d+)%\]\s+(.*)$")
2828

2929

30+
def _cmake_build_type() -> str:
31+
"""Map the CLI build-mode flags to a CMAKE_BUILD_TYPE string."""
32+
if ARG("debug"):
33+
return "Debug"
34+
if ARG("reldebug"):
35+
return "RelDebug"
36+
if ARG("fast_build", False):
37+
return "Fast"
38+
return "Release"
39+
40+
41+
def _apply_fast_build_gpu_arch() -> None:
42+
"""Under --fast-build on an NVHPC GPU build, restrict device codegen to a
43+
single compute capability (the node's GPU), overriding the multi-arch
44+
MFC_CUDA_CC that the module files set. CMake reads $ENV{MFC_CUDA_CC}.
45+
46+
Cray/AMD GPU builds don't use MFC_CUDA_CC (they are already single-arch via
47+
craype-accel/--offload-arch), so this only acts when MFC_CUDA_CC is set.
48+
Hard-errors if no GPU is detectable and no explicit arch is provided."""
49+
if not ARG("fast_build", False) or ARG("gpu") == gpuConfigOptions.NONE.value:
50+
return
51+
if not os.environ.get("MFC_CUDA_CC"): # not an NVHPC node; nothing to do
52+
return
53+
54+
override = os.environ.get("MFC_FAST_ARCH") # escape hatch for login nodes
55+
if override:
56+
os.environ["MFC_CUDA_CC"] = override
57+
return
58+
59+
try:
60+
result = subprocess.run(
61+
["nvidia-smi", "--query-gpu=compute_cap", "--format=csv,noheader"],
62+
capture_output=True,
63+
text=True,
64+
timeout=10,
65+
check=False,
66+
)
67+
caps = [ln.strip().replace(".", "") for ln in result.stdout.splitlines() if ln.strip()]
68+
except (OSError, subprocess.SubprocessError):
69+
caps = []
70+
71+
if not caps:
72+
raise MFCException("--fast-build: could not detect a local GPU compute capability " "(no GPU visible via nvidia-smi). Run on a GPU node, or set " "MFC_FAST_ARCH=<cc> (e.g. MFC_FAST_ARCH=90).")
73+
74+
os.environ["MFC_CUDA_CC"] = caps[0]
75+
76+
3077
def _run_build_with_progress(command: typing.List[str], target_name: str, streaming: bool = False) -> subprocess.CompletedProcess:
3178
"""
3279
Run a build command with a progress bar that parses ninja output.
@@ -367,6 +414,10 @@ def is_buildable(self) -> bool:
367414
def configure(self, case: Case):
368415
if ARG("debug") and ARG("reldebug"):
369416
raise MFCException("--debug and --reldebug are mutually exclusive.")
417+
if ARG("fast_build", False) and (ARG("debug") or ARG("reldebug")):
418+
raise MFCException("--fast-build is mutually exclusive with --debug/--reldebug.")
419+
420+
_apply_fast_build_gpu_arch()
370421

371422
build_dirpath = self.get_staging_dirpath(case)
372423
cmake_dirpath = self.get_cmake_dirpath()
@@ -386,9 +437,9 @@ def configure(self, case: Case):
386437
# build the configured targets. This is mostly useful for debugging.
387438
# See: https://cmake.org/cmake/help/latest/variable/CMAKE_EXPORT_COMPILE_COMMANDS.html.
388439
"-DCMAKE_EXPORT_COMPILE_COMMANDS=ON",
389-
# Set build type (Debug, RelDebug, or Release).
440+
# Set build type (Debug, RelDebug, Fast, or Release).
390441
# See: https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html
391-
f"-DCMAKE_BUILD_TYPE={'Debug' if ARG('debug') else 'RelDebug' if ARG('reldebug') else 'Release'}",
442+
f"-DCMAKE_BUILD_TYPE={_cmake_build_type()}",
392443
# Used by FIND_PACKAGE (/FindXXX) to search for packages, with the
393444
# second highest level of priority, still letting users manually
394445
# specify <PackageName>_ROOT, which has precedence over CMAKE_PREFIX_PATH.
@@ -468,7 +519,7 @@ def build(self, case: input.MFCInputFile):
468519
"--parallel",
469520
ARG("jobs"),
470521
"--config",
471-
"Debug" if ARG("debug") else "RelDebug" if ARG("reldebug") else "Release",
522+
_cmake_build_type(),
472523
]
473524

474525
verbosity = ARG("verbose")

toolchain/mfc/lock.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from .printer import cons
66
from .state import MFCConfig
77

8-
MFC_LOCK_CURRENT_VERSION: int = 8
8+
MFC_LOCK_CURRENT_VERSION: int = 9
99

1010

1111
@dataclasses.dataclass

toolchain/mfc/state.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ class MFCConfig:
1616
gpu: str = gpuConfigOptions.NONE.value
1717
debug: bool = False
1818
reldebug: bool = False
19+
fast_build: bool = False
1920
gcov: bool = False
2021
unified: bool = False
2122
single: bool = False

0 commit comments

Comments
 (0)