initial commit

tyh0123 · tyh0123 · commit e2cba04ae036 · 2026-01-05T22:47:01.000-08:00
diff --git a/Exec/GNUmakefile b/Exec/GNUmakefile
@@ -1,5 +1,5 @@
 # AMREX_HOME defines the directory in which we will find all the AMReX code.
-AMREX_HOME ?= ../../amrex
+AMREX_HOME = ../../amrex
 
 DEBUG        = FALSE
 USE_MPI      = TRUE
@@ -12,6 +12,32 @@ USE_FFT      = TRUE
 
 USE_SUNDIALS = FALSE
 
+
+# Pytorch directories 
+ifeq ($(USE_CUDA),TRUE)
+  PYTORCH_ROOT := ../../libtorch_cuda
+else
+  PYTORCH_ROOT := ../../libtorch_cpu
+endif
+TORCH_LIBPATH = $(PYTORCH_ROOT)/lib
+
+ifeq ($(USE_CUDA),TRUE)
+  TORCH_LIBS = -ltorch -ltorch_cpu -lc10 -lc10_cuda -lcuda 
+else
+  TORCH_LIBS = -ltorch -ltorch_cpu -lc10
+endif
+
+INCLUDE_LOCATIONS += $(PYTORCH_ROOT)/include \
+                     $(PYTORCH_ROOT)/include/torch/csrc/api/include
+LIBRARY_LOCATIONS += $(TORCH_LIBPATH)
+
+DEFINES += -D_GLIBCXX_USE_CXX11_ABI=1
+ifeq ($(USE_CUDA),TRUE)
+  LDFLAGS += -Xlinker "--no-as-needed,-rpath $(TORCH_LIBPATH) $(TORCH_LIBS)"
+else
+  LDFLAGS += -Wl,--no-as-needed,-rpath=$(TORCH_LIBPATH) $(TORCH_LIBS)
+endif
+
 include $(AMREX_HOME)/Tools/GNUMake/Make.defs
 
 include ../Source/Make.package
diff --git a/Source/Demag_ml.cpp b/Source/Demag_ml.cpp
@@ -0,0 +1,112 @@
+#include "MagneX.H"
+#include <torch/script.h>
+
+using namespace amrex;
+
+void CalculateH_demag_ML(const Array<MultiFab, AMREX_SPACEDIM>& Mfield,
+                         torch::jit::script::Module& x_norm_module,
+                         torch::jit::script::Module& ml_module,
+                         torch::jit::script::Module& y_norm_module,
+                         Array<MultiFab, AMREX_SPACEDIM>& H_demagfield)
+{
+    BL_PROFILE_VAR("CalculateH_demag_ML()", CalculateH_demag_ML);
+
+    for (MFIter mfi(Mfield[0], TilingIfNotGPU()); mfi.isValid(); ++mfi) {
+
+        const Box& bx = mfi.validbox();
+
+        const auto& Mx = Mfield[0].const_array(mfi);
+        const auto& My = Mfield[1].const_array(mfi);
+        const auto& Mz = Mfield[2].const_array(mfi);
+
+        auto Hx_demag = H_demagfield[0].array(mfi);
+        auto Hy_demag = H_demagfield[1].array(mfi);
+        auto Hz_demag = H_demagfield[2].array(mfi);
+
+        const IntVect bx_lo = bx.smallEnd();
+        const IntVect nbox  = bx.size();
+
+#if AMREX_SPACEDIM == 2
+        const int ncell = nbox[0] * nbox[1];
+#else
+        const int ncell = nbox[0] * nbox[1] * nbox[2];
+#endif
+
+        // Host-visible (Managed) buffers filled on GPU
+        amrex::Gpu::ManagedVector<Real> aux_Mx(ncell);
+        amrex::Gpu::ManagedVector<Real> aux_My(ncell);
+        amrex::Gpu::ManagedVector<Real> aux_Mz(ncell);
+
+        Real* AMREX_RESTRICT auxPtr_Mx = aux_Mx.dataPtr();
+        Real* AMREX_RESTRICT auxPtr_My = aux_My.dataPtr();
+        Real* AMREX_RESTRICT auxPtr_Mz = aux_Mz.dataPtr();
+
+        // Fill aux buffers from MultiFab on GPU
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
+            const int ii = i - bx_lo[0];
+            const int jj = j - bx_lo[1];
+
+#if AMREX_SPACEDIM == 2
+            const int index = jj + ii * nbox[1];
+#else
+            const int kk = k - bx_lo[2];
+            const int index = kk + jj * nbox[2] + ii * nbox[2] * nbox[1];
+#endif
+
+            auxPtr_Mx[index] = Mx(i, j, k);
+            auxPtr_My[index] = My(i, j, k);
+            auxPtr_Mz[index] = Mz(i, j, k);
+        });
+
+        // Make sure aux buffers are ready for from_blob on host side
+        amrex::Gpu::streamSynchronize();
+
+        // Wrap buffers as CPU tensors (no copy)
+        at::Tensor inputs_torch_Mx = torch::from_blob(auxPtr_Mx, {ncell, 1}, torch::kFloat64);
+        at::Tensor inputs_torch_My = torch::from_blob(auxPtr_My, {ncell, 1}, torch::kFloat64);
+        at::Tensor inputs_torch_Mz = torch::from_blob(auxPtr_Mz, {ncell, 1}, torch::kFloat64);
+
+        // Reshape (assumes ncell == 128*128*4)
+        at::Tensor reshaped_Mx = inputs_torch_Mx.reshape({128, 128, 4});
+        at::Tensor reshaped_My = inputs_torch_My.reshape({128, 128, 4});
+        at::Tensor reshaped_Mz = inputs_torch_Mz.reshape({128, 128, 4});
+
+        // Stack into [3, 128, 128, 4] then add batch dim -> [1, 3, 128, 128, 4]
+        at::Tensor final_tensor_M = torch::stack({reshaped_Mx, reshaped_My, reshaped_Mz}, 0);
+        final_tensor_M = final_tensor_M.to(torch::kCUDA).to(torch::kFloat32);
+        final_tensor_M = final_tensor_M.unsqueeze(0);
+
+        // Normalize -> model -> denormalize
+        at::Tensor norm_torch    = x_norm_module.get_method("encode")({final_tensor_M}).toTensor();
+        at::Tensor outputs_torch = ml_module.forward({norm_torch}).toTensor();
+        at::Tensor denorm_torch  = y_norm_module.get_method("decode")({outputs_torch}).toTensor();
+
+        // Convert to float64 for accessor<Real,...> usage (same as your original)
+        denorm_torch = denorm_torch.to(torch::kFloat64);
+
+        // Extract H components: denorm_torch shape assumed [1, 3, 128, 128, 4]
+        at::Tensor denorm_torch_Hx = denorm_torch.select(0, 0).select(0, 0).flatten();
+        at::Tensor denorm_torch_Hy = denorm_torch.select(0, 0).select(0, 1).flatten();
+        at::Tensor denorm_torch_Hz = denorm_torch.select(0, 0).select(0, 2).flatten();
+
+#ifdef AMREX_USE_CUDA
+        auto denorm_torch_Hx_acc = denorm_torch_Hx.packed_accessor64<Real, 1>();
+        auto denorm_torch_Hy_acc = denorm_torch_Hy.packed_accessor64<Real, 1>();
+        auto denorm_torch_Hz_acc = denorm_torch_Hz.packed_accessor64<Real, 1>();
+#endif
+
+        // Copy tensor data back into demag field
+        amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(int i, int j, int k) noexcept {
+            const int ii = i - bx_lo[0];
+            const int jj = j - bx_lo[1];
+            const int kk = k - bx_lo[2];
+            const int index = kk + jj * nbox[2] + ii * nbox[2] * nbox[1];
+
+            Hx_demag(i, j, k) = denorm_torch_Hx_acc[index];
+            Hy_demag(i, j, k) = denorm_torch_Hy_acc[index];
+            Hz_demag(i, j, k) = denorm_torch_Hz_acc[index];
+        });
+
+        amrex::Gpu::streamSynchronize();
+    }
+}
diff --git a/Source/MagneX.H b/Source/MagneX.H
@@ -1,5 +1,6 @@
 #ifdef AMREX_USE_CUDA
 #include <cufft.h>
+#include <torch/script.h>
 #else
 #include <fftw3.h>
 #ifdef AMREX_USE_MPI
@@ -184,3 +185,11 @@ void WritePlotfile(MultiFab& Ms,
                    const Geometry& geom,
                    const Real& time,
                    const int& plt_step);
+
+
+void CalculateH_demag_ML(const Array< MultiFab, AMREX_SPACEDIM> &   Mfield,
+                         torch::jit::script::Module& x_norm_module,
+                         torch::jit::script::Module& ml_module,
+                         torch::jit::script::Module& y_norm_module,
+                        Array< MultiFab, AMREX_SPACEDIM> &   H_demagfield);
+                        
diff --git a/Source/MagneX.cpp b/Source/MagneX.cpp
@@ -132,6 +132,10 @@ AMREX_GPU_MANAGED int MagneX::demag_coupling;
 // 0 = FFTW (single-MPI), 1 = heFFTe (distributed)
 AMREX_GPU_MANAGED int MagneX::FFT_solver;
 
+// ML flag
+int MagneX::ml_enable;
+
+
 void InitializeMagneXNamespace() {
 
     BL_PROFILE_VAR("InitializeMagneXNamespace()",InitializeMagneXNameSpace);
@@ -228,6 +232,9 @@ void InitializeMagneXNamespace() {
     restart = -1;
     pp.query("restart",restart);
 
+    ml_enable = 0;
+    pp.query("ml_enable",ml_enable);
+
     diag_type = -1;
     pp.query("diag_type",diag_type);
 
diff --git a/Source/MagneX_namespace.H b/Source/MagneX_namespace.H
@@ -52,6 +52,8 @@ namespace MagneX {
 
     extern int diag_type;
 
+    extern int ml_enable;
+
     extern int timedependent_Hbias;
     extern int timedependent_alpha;
 
diff --git a/Source/Make.package b/Source/Make.package
@@ -15,3 +15,4 @@ CEXE_headers += CartesianAlgorithm_K.H
 CEXE_headers += Demagnetization.H
 CEXE_headers += MagneX.H
 CEXE_headers += MagneX_namespace.H
+CEXE_sources += Demag_ml.cpp
diff --git a/Source/main.cpp b/Source/main.cpp
@@ -1,15 +1,17 @@
 #include "MagneX.H"
 #include "Demagnetization.H"
-
+#include <torch/script.h>
 #include <AMReX_MultiFab.H>
 #include <AMReX_VisMF.H>
-
+#include <AMReX_ParmParse.H>
 #ifdef AMREX_USE_SUNDIALS
 #include <AMReX_TimeIntegrator.H>
 #endif
 
 #include <cmath>
 
+#include <ATen/cuda/CUDAContext.h>  // for at::cuda::setDevice
+#include <c10/cuda/CUDAGuard.h>
 using namespace amrex;
 using namespace MagneX;
 
@@ -57,6 +59,10 @@ void main_main ()
     Array<MultiFab, AMREX_SPACEDIM> LLG_RHS;
     Array<MultiFab, AMREX_SPACEDIM> LLG_RHS_pre;
     Array<MultiFab, AMREX_SPACEDIM> LLG_RHS_avg;
+    torch::jit::script::Module ml_module;
+    torch::jit::script::Module x_norm_module;
+    torch::jit::script::Module y_norm_module;
+
 
     // Declare variables for hysteresis
     Real normalized_Mx;
@@ -100,6 +106,42 @@ void main_main ()
 
     }
 
+    // **********************************
+    // // LOAD PYTORCH MODEL
+
+    BL_PROFILE_VAR("LoadPytorch",LoadPytorch);
+
+    // Load pytorch module via torch script
+
+
+    std::string ml_model_name;
+    std::string x_normalizer_name;
+    std::string y_normalizer_name;
+
+    ParmParse pp_ml;
+    pp_ml.query("ml_model_name", ml_model_name);
+    pp_ml.query("x_normalizer_name", x_normalizer_name);
+    pp_ml.query("y_normalizer_name", y_normalizer_name);
+
+    amrex::Print()<<"\n"<<ml_model_name<<"\n";
+    amrex::Print()<<x_normalizer_name<<"\n";
+    amrex::Print()<<y_normalizer_name<<"\n";
+
+    int dev_id = amrex::Gpu::Device::deviceId();
+    c10::cuda::CUDAGuard device_guard(dev_id);
+    torch::Device dev(torch::kCUDA, dev_id);
+    try {
+        // Deserialize the ScriptModule from a file using torch::jit::load().
+        ml_module = torch::jit::load(ml_model_name, dev);
+        x_norm_module = torch::jit::load(x_normalizer_name, dev);
+        y_norm_module = torch::jit::load(y_normalizer_name, dev);
+    }
+    catch (const c10::Error& e) {
+        amrex::Abort("Error loading the model\n");
+    }
+
+    Print() << "Model loaded.\n";
+
     // **********************************
     // SIMULATION SETUP
 
@@ -378,7 +420,12 @@ void main_main ()
 
             // Evolve H_demag
             if (demag_coupling == 1) {
-                demag_solver.CalculateH_demag(Mfield_old, H_demagfield);
+                // demag_solver.CalculateH_demag(Mfield_old, H_demagfield);
+                if (ml_enable == 1) {
+                    CalculateH_demag_ML(Mfield_old, x_norm_module, ml_module, y_norm_module, H_demagfield);
+                } else {
+                    demag_solver.CalculateH_demag(Mfield_old, H_demagfield);
+                }
             }
 
             if (exchange_coupling == 1) {
@@ -496,7 +543,13 @@ void main_main ()
 
                 // Poisson solve and H_demag computation with Mfield
                 if (demag_coupling == 1) {
-                    demag_solver.CalculateH_demag(Mfield, H_demagfield);
+                    // demag_solver.CalculateH_demag(Mfield, H_demagfield);
+                    if (ml_enable == 1) {
+                        CalculateH_demag_ML(Mfield, x_norm_module, ml_module, y_norm_module, H_demagfield);
+                    } else {
+                        demag_solver.CalculateH_demag(Mfield, H_demagfield);
+                    }
+
                 }
 
                 if (exchange_coupling == 1) {
@@ -662,7 +715,12 @@ void main_main ()
                 // H_demag
                 if (demag_coupling == 1) {
                     if (fast_demag==1) {
-                        demag_solver.CalculateH_demag(ar_state, H_demagfield);
+                        // demag_solver.CalculateH_demag(ar_state, H_demagfield);
+                        if (ml_enable == 1) {
+                            CalculateH_demag_ML(ar_state, x_norm_module, ml_module, y_norm_module, H_demagfield);
+                        } else {
+                            demag_solver.CalculateH_demag(ar_state, H_demagfield);
+                        }
                     } else {
                         for (int idim=0; idim<AMREX_SPACEDIM; ++idim) {
                             H_demagfield[idim].setVal(0.);