|
1 | 1 | # Amplifier.NET |
2 | | -Amplifier allows .NET developers to easily run complex applications with intensive mathematical computation on Intel CPU/GPU, NVIDIA, AMD without writing any additional C kernel code. Write your function in .NET and Amplifier will take care of running it on your favorite hardware. |
3 | 2 |
|
4 | | -Below is the sample Kernel you can write in CSharp |
| 3 | +**Write C#. Run on GPU.** |
| 4 | + |
| 5 | +Amplifier.NET is a GPU computing library that lets .NET developers harness the power of parallel processing on Intel, NVIDIA, and AMD hardware—without writing a single line of C or OpenCL kernel code. |
| 6 | + |
| 7 | +[](https://www.nuget.org/packages/Amplifier.NET/) |
| 8 | +[](https://opensource.org/licenses/MIT) |
| 9 | + |
| 10 | +## Why Amplifier.NET? |
| 11 | + |
| 12 | +Modern applications demand massive computational power for machine learning, scientific simulations, image processing, and financial modeling. GPUs can process thousands of operations in parallel, but traditionally require specialized knowledge of OpenCL, CUDA, or shader languages. |
| 13 | + |
| 14 | +**Amplifier.NET bridges this gap.** Write your compute kernels in familiar C# syntax, and let Amplifier handle the translation to OpenCL, device management, and memory transfers. Your code runs on any OpenCL-compatible device—from integrated Intel graphics to high-end discrete GPUs. |
| 15 | + |
| 16 | +## Features |
| 17 | + |
| 18 | +- **Pure C# Kernels** — Write GPU compute functions using standard C# syntax |
| 19 | +- **Automatic Translation** — C# code is decompiled and translated to OpenCL C99 at runtime |
| 20 | +- **OpenCL 3.0 Support** — Full support for the latest OpenCL specification including optional features |
| 21 | +- **Cross-Platform** — Works on Windows, Linux, and macOS with any OpenCL driver |
| 22 | +- **Multi-Device** — Enumerate and target specific compute devices (CPU, GPU, FPGA) |
| 23 | +- **Struct Support** — Pass custom structs between host and device |
| 24 | +- **XArray System** — Advanced array types with shape manipulation and automatic memory management |
| 25 | + |
| 26 | +## Quick Start |
| 27 | + |
| 28 | +### Installation |
| 29 | + |
| 30 | +```bash |
| 31 | +dotnet add package Amplifier.NET |
| 32 | +``` |
| 33 | + |
| 34 | +### Your First Kernel |
| 35 | + |
| 36 | +Define a kernel class that extends `OpenCLFunctions`: |
5 | 37 |
|
6 | 38 | ```csharp |
7 | | -[OpenCLKernel] |
8 | | -void add_float([Global]float[] a, [Global] float[] b, [Global]float[] r) |
9 | | -{ |
10 | | - int i = get_global_id(0); |
11 | | - b[i] = 0.5f * b[i]; |
12 | | - r[i] = a[i] + b[i]; |
13 | | -} |
| 39 | +using Amplifier.OpenCL; |
14 | 40 |
|
15 | | -[OpenCLKernel] |
16 | | -void Fill([Global] float[] x, float value) |
| 41 | +public class MyKernels : OpenCLFunctions |
17 | 42 | { |
18 | | - int i = get_global_id(0); |
19 | | - |
20 | | - x[i] = value; |
| 43 | + [OpenCLKernel] |
| 44 | + void VectorAdd([Global] float[] a, [Global] float[] b, [Global] float[] result) |
| 45 | + { |
| 46 | + int i = get_global_id(0); |
| 47 | + result[i] = a[i] + b[i]; |
| 48 | + } |
| 49 | + |
| 50 | + [OpenCLKernel] |
| 51 | + void Scale([Global] float[] data, float factor) |
| 52 | + { |
| 53 | + int i = get_global_id(0); |
| 54 | + data[i] *= factor; |
| 55 | + } |
21 | 56 | } |
22 | 57 | ``` |
23 | 58 |
|
24 | | -Now this kernel will be converted to C99 format which is specific instruction for OpenCL. Let's do some magic to execute the kernel using OpenCL |
| 59 | +### Execute on GPU |
25 | 60 |
|
26 | | -1. Create an instance of OpenCL compiler. You can list all the available devices. |
27 | 61 | ```csharp |
| 62 | +using Amplifier; |
| 63 | + |
| 64 | +// Initialize the compiler and select a device |
28 | 65 | var compiler = new OpenCLCompiler(); |
29 | | -Console.WriteLine("\nList Devices----"); |
30 | | -foreach (var item in compiler.Devices) |
31 | | -{ |
32 | | - Console.WriteLine(item); |
33 | | -} |
| 66 | + |
| 67 | +Console.WriteLine("Available Devices:"); |
| 68 | +foreach (var device in compiler.Devices) |
| 69 | + Console.WriteLine($" {device}"); |
| 70 | + |
| 71 | +compiler.UseDevice(0); // Select first device |
| 72 | +compiler.CompileKernel(typeof(MyKernels)); |
| 73 | + |
| 74 | +// Prepare data |
| 75 | +float[] a = { 1, 2, 3, 4, 5 }; |
| 76 | +float[] b = { 10, 20, 30, 40, 50 }; |
| 77 | +float[] result = new float[5]; |
| 78 | + |
| 79 | +// Execute kernels |
| 80 | +var exec = compiler.GetExec(); |
| 81 | +exec.VectorAdd(a, b, result); |
| 82 | + |
| 83 | +Console.WriteLine(string.Join(", ", result)); |
| 84 | +// Output: 11, 22, 33, 44, 55 |
34 | 85 | ``` |
35 | 86 |
|
36 | | -2. Select a device by id and load the Sample kernel created. |
| 87 | +## Working with Structs |
| 88 | + |
| 89 | +Amplifier supports custom structs for complex data types: |
| 90 | + |
37 | 91 | ```csharp |
38 | | -compiler.UseDevice(0); |
39 | | -compiler.CompileKernel(typeof(SimpleKernels)); |
| 92 | +using System.Runtime.InteropServices; |
| 93 | + |
| 94 | +[StructLayout(LayoutKind.Sequential)] |
| 95 | +public struct Particle |
| 96 | +{ |
| 97 | + public float X, Y, Z; |
| 98 | + public float VelocityX, VelocityY, VelocityZ; |
| 99 | + public float Mass; |
| 100 | + public int Active; |
| 101 | +} |
40 | 102 |
|
41 | | -Console.WriteLine("\nList Kernels----"); |
42 | | -foreach (var item in compiler.Kernels) |
| 103 | +public class PhysicsKernels : OpenCLFunctions |
43 | 104 | { |
44 | | - Console.WriteLine(item); |
| 105 | + [OpenCLKernel] |
| 106 | + void Integrate([Global][Struct] Particle[] particles, float deltaTime) |
| 107 | + { |
| 108 | + int i = get_global_id(0); |
| 109 | + if (particles[i].Active == 1) |
| 110 | + { |
| 111 | + particles[i].X += particles[i].VelocityX * deltaTime; |
| 112 | + particles[i].Y += particles[i].VelocityY * deltaTime; |
| 113 | + particles[i].Z += particles[i].VelocityZ * deltaTime; |
| 114 | + } |
| 115 | + } |
45 | 116 | } |
| 117 | + |
| 118 | +// Compile with struct types |
| 119 | +compiler.CompileKernel(typeof(PhysicsKernels), typeof(Particle)); |
46 | 120 | ``` |
47 | 121 |
|
48 | | -3. Declare variable and do some operation which will run on any hardware selected like Intel CPU/GPU, NVIDIA, AMD etc. |
| 122 | +## Advanced: XArray for Scientific Computing |
| 123 | + |
| 124 | +The `XArray` system provides NumPy-like array operations with automatic GPU memory management: |
| 125 | + |
49 | 126 | ```csharp |
50 | | -Array a = new float[] { 1, 2, 3, 4 }; |
51 | | -Array b = new float[4]; |
52 | | -Array r = new float[4]; |
| 127 | +int M = 1024, N = 1024, K = 512; |
53 | 128 |
|
54 | | -var exec = compiler.GetExec<float>(); |
55 | | -exec.Fill(b, 0.5f); |
56 | | -exec.add_float(a, b, r); |
| 129 | +var a = new InArray(new long[] { M, K }, DType.Float32); |
| 130 | +var b = new InArray(new long[] { K, N }, DType.Float32); |
| 131 | +var c = new OutArray(new long[] { M, N }, DType.Float32); |
57 | 132 |
|
58 | | -Console.WriteLine("\nResult----"); |
59 | | -for(int i = 0;i<r.Length;i++) |
60 | | -{ |
61 | | - Console.Write(r.GetValue(i) + " "); |
62 | | -} |
| 133 | +exec.Fill(a, 1.0f); |
| 134 | +exec.Fill(b, 2.0f); |
| 135 | +exec.MatMul(M, N, K, a, b, c); |
| 136 | + |
| 137 | +float[] result = c.ToArray(); |
63 | 138 | ``` |
64 | 139 |
|
65 | | -Result: |
| 140 | +## OpenCL Built-in Functions |
| 141 | + |
| 142 | +Kernels have access to all standard OpenCL functions: |
66 | 143 |
|
67 | | - |
| 144 | +| Category | Functions | |
| 145 | +|----------|-----------| |
| 146 | +| **Work-item** | `get_global_id`, `get_local_id`, `get_group_id`, `get_global_size` | |
| 147 | +| **Math** | `sin`, `cos`, `tan`, `exp`, `log`, `pow`, `sqrt`, `fabs`, `fmin`, `fmax` | |
| 148 | +| **Geometric** | `dot`, `cross`, `length`, `normalize`, `distance` | |
| 149 | +| **Integer** | `abs`, `clamp`, `min`, `max` | |
| 150 | +| **Synchronization** | `barrier`, `mem_fence` | |
68 | 151 |
|
| 152 | +## Performance Tips |
| 153 | + |
| 154 | +1. **Minimize Host-Device Transfers** — Keep data on the GPU between kernel calls |
| 155 | +2. **Use Appropriate Work Sizes** — Match your problem dimensions to the kernel's global size |
| 156 | +3. **Prefer Float over Double** — Many GPUs have limited double-precision performance |
| 157 | +4. **Coalesce Memory Access** — Access contiguous memory addresses for best throughput |
| 158 | +5. **Avoid Branching** — Divergent control flow reduces GPU efficiency |
| 159 | + |
| 160 | +## Supported Platforms |
| 161 | + |
| 162 | +| Platform | Status | |
| 163 | +|----------|--------| |
| 164 | +| Windows (x64) | Fully Supported | |
| 165 | +| Linux (x64) | Fully Supported | |
| 166 | +| macOS | Supported (Intel/AMD GPUs) | |
| 167 | + |
| 168 | +**Tested Hardware:** |
| 169 | +- Intel Iris Xe, UHD Graphics |
| 170 | +- NVIDIA GTX/RTX series |
| 171 | +- AMD Radeon RX series |
69 | 172 |
|
70 | 173 | ## Documentation |
71 | | -* Base: https://deepakkumar1984.github.io/Amplifier.NET/ |
72 | | -* Articles: https://deepakkumar1984.github.io/Amplifier.NET/articles/intro.html |
73 | | -* API Reference: https://deepakkumar1984.github.io/Amplifier.NET/api/Amplifier.html |
74 | 174 |
|
75 | | -## Any contribution is welcome |
76 | | -Please fork the code and suggest improvements by raising PR. Raise issues so that I can make this library robust. |
| 175 | +- **Getting Started**: [Articles](https://deepakkumar1984.github.io/Amplifier.NET/articles/intro.html) |
| 176 | +- **API Reference**: [Documentation](https://deepakkumar1984.github.io/Amplifier.NET/api/Amplifier.html) |
| 177 | +- **Examples**: See the [examples](examples/) directory |
| 178 | + |
| 179 | +## Contributing |
| 180 | + |
| 181 | +Contributions are welcome! Please: |
| 182 | + |
| 183 | +1. Fork the repository |
| 184 | +2. Create a feature branch |
| 185 | +3. Submit a pull request |
| 186 | + |
| 187 | +For bugs or feature requests, please [open an issue](https://github.com/deepakkumar1984/Amplifier.NET/issues). |
| 188 | + |
| 189 | +## License |
| 190 | + |
| 191 | +Amplifier.NET is released under the [MIT License](LICENSE). |
| 192 | + |
| 193 | +--- |
| 194 | + |
| 195 | +**Amplifier.NET** — Unlock the power of GPU computing in pure C#. |
0 commit comments