Skip to content

Commit 6b9f30a

Browse files
shoikedakurbeco
authored andcommitted
MiniDxNN v0.3.0
1 parent b9b943a commit 6b9f30a

63 files changed

Lines changed: 16961 additions & 2297 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ build/*
5050
!/build/.gitkeep
5151

5252
# Runtime
53-
unittest/runtime/*
54-
!/unittest/runtime/.gitkeep
53+
third_party/runtime/*
54+
!/third_party/runtime/.gitkeep
5555

5656
# CMake
5757
CMakeLists.txt.user

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ include(${CMAKE_CURRENT_LIST_DIR}/cmake/project.cmake)
1515
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utility.cmake)
1616

1717
set(project_desc "A minimal DirectX-based neural network library")
18-
project(MiniDXNN DESCRIPTION ${project_desc} VERSION 0.2.0 LANGUAGES CXX)
18+
project(MiniDXNN DESCRIPTION ${project_desc} VERSION 0.3.0 LANGUAGES CXX)
1919

2020
# Configure project options (build flags, sanitizers, etc.)
2121
setProjectOptions()

NOTICE.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,12 @@ and contains third-party components under various open source licenses.
1515
* [GoogleTest](https://github.com/google/googletest)
1616
- **used for:** To introduce testing framework
1717
- **license:** BSD-3-Clause license
18+
* [stb_image / stb_image_write](https://github.com/nothings/stb)
19+
- **used for:** PNG image loading and writing
20+
- **license:** MIT License / Public Domain
21+
22+
## Resources
23+
24+
* [tokyo-skyline.png](resources/image/tokyo-skyline.png)
25+
- **source:** [Tokyo Skyline at Night with Tokyo Tower — Pexels](https://www.pexels.com/photo/tokyo-skyline-at-night-with-tokyo-tower-31558042/)
26+
- **license:** [Pexels License](https://www.pexels.com/license/) — Free for personal and commercial use, no attribution required

README.md

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
1-
# MiniDXNN — MLP Inference & Training on DirectX 12 with Cooperative Vector
1+
# MiniDXNN — MLP Inference & Training on DirectX 12 with LinAlg Matrix
22

33
![CMake build on Windows](../../actions/workflows/cmake.yaml/badge.svg)
44

55
<p align="center"><img src="./docs/minidxnn-logo.png" width="200"></p>
66

7-
An implementation of **MLP** (Multi-Layer Perceptron) inference and training using DirectX 12 [Cooperative Vector][coop-vec-spec]. This library demonstrates GPU-accelerated neural network inference and training with cutting-edge shader features.
7+
An implementation of **MLP** (Multi-Layer Perceptron) inference and training using DirectX 12 [LinAlg Matrix][linalg-spec]. This library demonstrates GPU-accelerated neural network inference and training with cutting-edge shader features.
88

9-
- 🚀 **High Performance**: GPU-accelerated inference and training using [Cooperative Vector][coop-vec-overview]
9+
- 🚀 **High Performance**: GPU-accelerated inference and training using [LinAlg Matrix][linalg-overview]
1010
- 🔧 **Flexible Architecture**: Configurable layers, activations, and data types
1111
- 🎯 **Single-header HLSL**: Easy to integrate into any DX12 project
1212

1313
## Requirements
1414

1515
- **OS**: Windows 11 with [Developer Mode][win-dev-mode] enabled
16-
- **GPU**: Supports Shader Model 6.9 and Cooperative Vector in D3D12 (AMD Radeon™ RX 9000 Series GPUs or equivalent NVIDIA)
16+
- **GPU**: Supports Shader Model 6.10 and LinAlg Matrix in D3D12 (AMD Radeon™ RX 9000 Series GPUs or equivalent NVIDIA)
1717
- **Build**: CMake ≥ 3.21, Visual Studio 2022 (C++20), Windows SDK
18-
- **DX12 Runtime**: [Agility SDK 1.717.1-preview][dx12-agility-sdk-download], [DXC v1.8.2505.1][dx12-dxc-download]
18+
- **DX12 Runtime**: [Agility SDK 1.720-preview][dx12-agility-sdk-download], [DXC v1.10.2605.2][dx12-dxc-download]
1919
- **Python**: Python 3.8+ with PyTorch (optional, for example python training)
2020

2121
## Getting Started
@@ -39,11 +39,13 @@ Example binaries are output to `build/example/Release/`. Run them from `build/ex
3939

4040
## DX12 Setup
4141

42-
⚠️ **Important**: As of early 2026, Cooperative Vector requires experimental feature support.
42+
⚠️ **Important**: As of early 2026, LinAlg Matrix requires experimental feature support.
4343

44-
1. Install a [Cooperative Vector supported driver][coop-vec-driver]
44+
1. Install a [LinAlg Matrix supported driver][linalg-driver]
4545
2. Enable [Experimental Shader Model][dx-experimental-shader-model] with [D3D12EnableExperimentalFeatures][dx-enable-experimental-features] **before** creating the device
46-
3. Compile shaders with **Shader Model 6.9**
46+
3. Compile shaders with **Shader Model 6.10**
47+
48+
For a detailed walkthrough — including feature checks, weight matrix conversion (`GetLinearAlgebraMatrixConversionDestinationInfo` / `ConvertLinearAlgebraMatrix`), bias alignment, and full sample code — see the **[LinAlg Matrix MLP Guide](docs/linalg_matrix_mlp.md)**.
4749

4850
## HLSL Usage
4951

@@ -147,23 +149,26 @@ MiniDXNN/
147149
| **Operations** | Forward pass (inference), backward pass (training with gradient accumulation) |
148150
| **Activations** | Identity, Sigmoid, ReLU, Leaky ReLU (custom activations supported — e.g. Tanh) |
149151
| **Data type** | float16 (`DATA_TYPE_FLOAT16`) — currently the only tested type |
150-
| **Matrix layout** | Row-major (`MATRIX_LAYOUT_ROW_MAJOR`) — currently the only tested layout |
152+
| **Matrix layout** | Row-major, Column-major, Mul-optimal, Outer-product-optimal |
151153

152154
## Examples
153155

154156
| # | Name | Description |
155157
|---|------|-------------|
156158
| 01 | [Texture Inference](./example/01_texture_inference) | Load a pre-trained MLP binary and reconstruct a texture on the GPU |
157159
| 02 | [Texture Training](./example/02_texture_training) | Train an MLP on-GPU to learn a 2D texture pattern, then reconstruct it |
160+
| 03 | [Texture Compression with Input Encoding](./example/03_texture_compression_with_input_encoding) | Train with positional/grid input encoding for higher-quality texture compression |
158161

159162
See [example/README.md](./example/README.md) for step-by-step instructions.
160163

161164
## Documentation
162165

166+
- [LinAlg Matrix MLP Guide](docs/linalg_matrix_mlp.md) — Step-by-step setup, weight conversion, bias alignment, and shader compilation
163167
- [HLSL API Reference](docs/mlp_hlsl.md)`mlp.hlsl` types, functions, and memory layout
164168
- [Example Guide](example/README.md) — building and running the examples
165-
- [Cooperative Vector Spec][coop-vec-spec] — HLSL specification
166-
- [D3D12 Cooperative Vector Blog][coop-vec-overview] — overview and getting started
169+
- [LinAlg Matrix Spec][linalg-spec] — HLSL specification
170+
- [D3D12 LinAlg Matrix Overview][linalg-overview] — runtime feature support and getting started
171+
- [LinAlg Examples][linalg-examples] — official example code
167172

168173
## License
169174

@@ -177,16 +182,22 @@ Copyright (c) 2026 Advanced Micro Devices, Inc. All rights reserved.
177182
- [gfx](https://github.com/gboisse/gfx) — MIT
178183
- [CLI11](https://github.com/CLIUtils/CLI11) — BSD-3-Clause
179184
- [GoogleTest](https://github.com/google/googletest) — BSD-3-Clause
185+
- [stb_image](https://github.com/nothings/stb) — MIT/Public Domain
180186

181187
See [NOTICE.md](NOTICE.md) for details.
182188

189+
### CMake dependency downloads
190+
191+
When building with GPU support (without `MINIDXNN_CPP_FALLBACK_ONLY`), CMake auto-downloads dependencies to `third_party/gfx_dep/gfx/third_party/`.
192+
183193
---
184194

185-
[coop-vec-spec]: https://github.com/microsoft/hlsl-specs/blob/main/proposals/0029-cooperative-vector.md
186-
[coop-vec-overview]: https://devblogs.microsoft.com/directx/cooperative-vector/
187-
[coop-vec-driver]: https://devblogs.microsoft.com/directx/cooperative-vector/#get-running
195+
[linalg-spec]: https://github.com/microsoft/hlsl-specs/blob/main/proposals/0035-linalg-matrix.md
196+
[linalg-overview]: https://microsoft.github.io/DirectX-Specs/d3d/D3D12LinearAlgebraRuntimeFeatureSupport.html#tier-1-support
197+
[linalg-driver]: https://devblogs.microsoft.com/directx/shader-model-6-10-agilitysdk-720-preview/
198+
[linalg-examples]: https://github.com/llvm-beanz/linalg-examples
188199
[win-dev-mode]: https://learn.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
189200
[dx-enable-experimental-features]: https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-d3d12enableexperimentalfeatures
190201
[dx-experimental-shader-model]: https://devblogs.microsoft.com/directx/ser/#availability
191-
[dx12-agility-sdk-download]: https://www.nuget.org/api/v2/package/Microsoft.Direct3D.D3D12/1.717.1-preview
192-
[dx12-dxc-download]: https://github.com/microsoft/DirectXShaderCompiler/releases/download/v1.8.2505.1/dxc_2025_07_14.zip
202+
[dx12-agility-sdk-download]: https://devblogs.microsoft.com/directx/directx12agility/
203+
[dx12-dxc-download]: https://github.com/microsoft/DirectXShaderCompiler/releases/tag/v1.10.2605.2

cmake/compiler.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ function(setCxxCompileFlags target scope)
3737
/options:strict # Strict options checking
3838
)
3939
set(msvc_linker_flags)
40-
set(msvc_definitions)
40+
set(msvc_definitions _CRT_SECURE_NO_WARNINGS) # Suppress deprecated CRT warnings from third-party headers (stb_image)
4141

4242
# GCC compiler flags
4343
set(gcc_flags)
@@ -144,6 +144,7 @@ function(setCxxWarningFlags target scope)
144144
/wd4514 # unreferenced inline function removed (informational)
145145
/wd4625 # copy constructor implicitly deleted (GoogleTest)
146146
/wd4626 # copy assignment implicitly deleted (GoogleTest)
147+
/wd4702 # unreachable code (can be triggered by compile-time branching)
147148
/wd4710 # function not inlined (compiler decision, informational)
148149
/wd4711 # function selected for automatic inline expansion (informational)
149150
/wd4820 # struct padding added (informational)

cmake/utility.cmake

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,34 @@ endfunction(checkTarget)
3838
## dest_path: Destination path for the symlink
3939
## Note: On Windows, developer mode should be enabled for symlink creation without admin privileges
4040
function(createSymlink source_path dest_path)
41-
file(CREATE_LINK "${source_path}" "${dest_path}"
42-
RESULT link_result
43-
COPY_ON_ERROR
41+
file(CREATE_LINK "${source_path}" "${dest_path}"
42+
RESULT link_result
43+
COPY_ON_ERROR
4444
SYMBOLIC)
4545
if(link_result)
4646
message(FATAL_ERROR "Failed to create symlink from ${source_path} to ${dest_path}: ${link_result}")
4747
endif()
4848
endfunction(createSymlink)
49+
50+
51+
## Copy runtime DLL/SO overrides from third_party/runtime to the target's output directory
52+
## If third_party/runtime contains DLL or SO files, they overwrite the corresponding
53+
## files already copied by TARGET_RUNTIME_DLLS, allowing custom driver/compiler builds.
54+
## Args:
55+
## target: The executable target whose output directory receives the overrides
56+
function(copyRuntimeOverrides target)
57+
cmake_path(SET runtime_dir "${CMAKE_SOURCE_DIR}/third_party/runtime")
58+
if(WIN32)
59+
file(GLOB runtime_files "${runtime_dir}/*.dll")
60+
else()
61+
file(GLOB runtime_files "${runtime_dir}/*.so" "${runtime_dir}/*.so.*")
62+
endif()
63+
if(runtime_files)
64+
foreach(runtime_file IN LISTS runtime_files)
65+
cmake_path(GET runtime_file FILENAME filename)
66+
add_custom_command(TARGET ${target} POST_BUILD
67+
COMMAND ${CMAKE_COMMAND} -E copy "${runtime_file}" "$<TARGET_FILE_DIR:${target}>/${filename}"
68+
COMMENT "Overriding ${filename} from third_party/runtime")
69+
endforeach()
70+
endif()
71+
endfunction(copyRuntimeOverrides)

0 commit comments

Comments
 (0)