From 9c1febcaff19b66ce6edd0c175af69eaf271b318 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 7 Jul 2025 16:29:51 +0100 Subject: [PATCH 01/44] Add NaN print traces --- pypolychord/_pypolychord.cpp | 11 +++++++++++ src/polychord/calculate.f90 | 7 +++++++ src/polychord/chordal_sampling.f90 | 16 ++++++++++++++++ src/polychord/utils.F90 | 12 ++++++++++++ 4 files changed, 46 insertions(+) diff --git a/pypolychord/_pypolychord.cpp b/pypolychord/_pypolychord.cpp index fb727365..5cc5b413 100644 --- a/pypolychord/_pypolychord.cpp +++ b/pypolychord/_pypolychord.cpp @@ -4,6 +4,7 @@ #include "interfaces.hpp" #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include +#include #ifdef USE_MPI #include @@ -38,6 +39,16 @@ static PyObject *python_loglikelihood = NULL; double loglikelihood(double* theta, int nDims, double* phi, int nDerived) { + // --- START OF ADDED WARNING --- + for (int i = 0; i < nDims; ++i) { + if (std::isnan(theta[i])) { + PySys_WriteStderr("PolyChord TRACE (C++ loglikelihood): NaN detected in parameter vector received from Fortran.\n"); + // We can break after the first one is found. + break; + } + } + // --- END OF ADDED WARNING --- + /* Create a python version of theta */ npy_intp theta_shape[] = {nDims}; PyObject *array_theta = PyArray_SimpleNewFromData(1, theta_shape, NPY_DOUBLE, theta); diff --git a/src/polychord/calculate.f90 b/src/polychord/calculate.f90 index 61d42f2b..853887c9 100644 --- a/src/polychord/calculate.f90 +++ b/src/polychord/calculate.f90 @@ -37,6 +37,13 @@ function prior(cube) result(theta) cube = point(settings%h0:settings%h1) + ! --- START OF ADDED WARNING --- + if (any(isnan(cube))) then + write(stdout_unit,'(A)') 'PolyChord TRACE (calculate_point): NaN detected in hypercube vector before prior transformation.' + flush(stdout_unit) + end if + ! --- END OF ADDED WARNING --- + if ( any(cubemx) ) then theta = 0 logL = settings%logzero diff --git a/src/polychord/chordal_sampling.f90 b/src/polychord/chordal_sampling.f90 index c4ed3fc4..d7d8102d 100644 --- a/src/polychord/chordal_sampling.f90 +++ b/src/polychord/chordal_sampling.f90 @@ -78,6 +78,15 @@ function prior(cube) result(theta) ! Normalise it w = sqrt(dot_product(nhat,nhat)) + + ! --- START OF ADDED WARNING --- + if (w < 1.d-20) then + write(stdout_unit,'(A)') 'PolyChord WARNING: Slice sampling direction vector has zero magnitude (w=0).' + write(stdout_unit,'(A)') ' Division by zero is imminent, resulting in NaN parameters.' + flush(stdout_unit) + end if + ! --- END OF ADDED WARNING --- + nhat = nhat/w w = w * 3d0 !* exp( lgamma(0.5d0 * settings%nDims) - lgamma(1.5d0 + 0.5d0 * settings%nDims) ) * settings%nDims @@ -246,6 +255,13 @@ function prior(cube) result(theta) ! Draw a random point within L and R baby_point(S%h0:S%h1) = x0(S%h0:S%h1)+ (random_real() * (x0Rd+x0Ld) - x0Ld) * nhat + ! --- START OF ADDED WARNING --- + if (any(isnan(baby_point(S%h0:S%h1)))) then + write(stdout_unit,'(A)') 'PolyChord TRACE (slice_sample): NaN detected in new parameter vector immediately after creation.' + flush(stdout_unit) + end if + ! --- END OF ADDED WARNING --- + ! calculate the likelihood call calculate_point(loglikelihood,prior,baby_point,S,n) diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index e6fc74ef..4c34dde1 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -645,6 +645,18 @@ function calc_cholesky(a) result(L) if (L(i,i).le.0d0) then ! If the cholesky decomposition does not exist, then set it to ! be a re-scaled identity matrix + + ! --- START OF ADDED WARNING --- + real(dp) :: trace_val + trace_val = trace(a) + if (trace_val <= 1.d-20) then + write(stdout_unit,'(A)') 'PolyChord WARNING: Cholesky decomposition failed.' + write(stdout_unit,'(A, E12.5)') ' Matrix trace is near-zero: ', trace_val + write(stdout_unit,'(A)') ' This will likely lead to a NaN direction vector.' + flush(stdout_unit) + end if + ! --- END OF ADDED WARNING --- + L = identity_matrix(size(a,1)) * sqrt(trace(a)) return else From f03f1929c8043c136992d9815e0703b4690ad047 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 7 Jul 2025 16:50:38 +0100 Subject: [PATCH 02/44] move variable definition to the beginning --- src/polychord/utils.F90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index 4c34dde1..f8d65854 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -634,6 +634,8 @@ function calc_cholesky(a) result(L) real(dp), intent(in),dimension(:,:) :: a real(dp), dimension(size(a,1),size(a,2)) :: L integer :: i,j + ! added trace_val to check for NaN + real(dp) :: trace_val ! Set it all to zero to begin with L = 0 @@ -647,7 +649,6 @@ function calc_cholesky(a) result(L) ! be a re-scaled identity matrix ! --- START OF ADDED WARNING --- - real(dp) :: trace_val trace_val = trace(a) if (trace_val <= 1.d-20) then write(stdout_unit,'(A)') 'PolyChord WARNING: Cholesky decomposition failed.' From d90aa543e5ce61a6f32feebb0ad4a8d6ca0937cc Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 7 Jul 2025 17:02:05 +0100 Subject: [PATCH 03/44] add stdout_unit usage --- src/polychord/calculate.f90 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/polychord/calculate.f90 b/src/polychord/calculate.f90 index 853887c9..b12c446a 100644 --- a/src/polychord/calculate.f90 +++ b/src/polychord/calculate.f90 @@ -5,6 +5,9 @@ module calculate_module subroutine calculate_point(loglikelihood,prior,point,settings,nlike) use settings_module, only: program_settings + ! added to check for NaN in hypercube vector + use utils_module, only: stdout_unit + implicit none interface function loglikelihood(theta,phi) From 9df0aff7f6e4ff018acf0cffea42103ec33a8f61 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 7 Jul 2025 17:11:10 +0100 Subject: [PATCH 04/44] add stoud_unit to use --- src/polychord/chordal_sampling.f90 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/polychord/chordal_sampling.f90 b/src/polychord/chordal_sampling.f90 index d7d8102d..c84f0bb4 100644 --- a/src/polychord/chordal_sampling.f90 +++ b/src/polychord/chordal_sampling.f90 @@ -7,6 +7,8 @@ module chordal_module function SliceSampling(loglikelihood,prior,settings,logL,seed_point,cholesky,nlike,num_repeats) result(baby_points) use settings_module, only: program_settings use random_module, only: random_orthonormal_basis,random_real + ! added to check for NaN in hypercube vector, to support stdout + use utils_module, only: stdout_unit implicit none interface @@ -171,7 +173,8 @@ end function generate_nhats !! function slice_sample(loglikelihood,prior,logL,nhat,x0,w,S,n) result(baby_point) use settings_module, only: program_settings - use utils_module, only: distance + ! added stdout_unit to support printing warnings about NaN + use utils_module, only: distance, stdout_unit use random_module, only: random_real use calculate_module, only: calculate_point implicit none From f0be490b4b883b72e97bd2dd5561e20510997534 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 8 Jul 2025 10:42:55 +0100 Subject: [PATCH 05/44] change write to * --- src/polychord/calculate.f90 | 4 ++-- src/polychord/chordal_sampling.f90 | 22 +++++++++++++++----- src/polychord/utils.F90 | 32 ++++++++++++++++++++++++------ 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/src/polychord/calculate.f90 b/src/polychord/calculate.f90 index b12c446a..1ded390a 100644 --- a/src/polychord/calculate.f90 +++ b/src/polychord/calculate.f90 @@ -42,8 +42,8 @@ function prior(cube) result(theta) ! --- START OF ADDED WARNING --- if (any(isnan(cube))) then - write(stdout_unit,'(A)') 'PolyChord TRACE (calculate_point): NaN detected in hypercube vector before prior transformation.' - flush(stdout_unit) + write(*,'(A)') 'PolyChord TRACE (calculate_point): NaN detected in hypercube vector before prior transformation.' + flush(6) end if ! --- END OF ADDED WARNING --- diff --git a/src/polychord/chordal_sampling.f90 b/src/polychord/chordal_sampling.f90 index c84f0bb4..f4d0e820 100644 --- a/src/polychord/chordal_sampling.f90 +++ b/src/polychord/chordal_sampling.f90 @@ -74,6 +74,18 @@ function prior(cube) result(theta) ! Transform to the unit hypercube nhats = matmul(cholesky,nhats) + ! --- NEW, CRITICAL TRACE --- + if (any(isnan(nhats))) then + write(*,'(A)') 'PolyChord TRACE (SliceSampling): NaN detected in nhats vector immediately after matmul(cholesky,nhats).' + if (any(isnan(cholesky))) then + write(*,'(A)') ' The source is a NaN cholesky matrix.' + else + write(*,'(A)') ' The source is NOT the cholesky matrix. Problem is in generate_nhats or matmul.' + end if + flush(6) + end if + ! --- END OF TRACE --- + do i_babies=1,size(nhats,2) ! Get a new random direction nhat = nhats(:,i_babies) @@ -83,9 +95,9 @@ function prior(cube) result(theta) ! --- START OF ADDED WARNING --- if (w < 1.d-20) then - write(stdout_unit,'(A)') 'PolyChord WARNING: Slice sampling direction vector has zero magnitude (w=0).' - write(stdout_unit,'(A)') ' Division by zero is imminent, resulting in NaN parameters.' - flush(stdout_unit) + write(*,'(A)') 'PolyChord WARNING: Slice sampling direction vector has zero magnitude (w=0).' + write(*,'(A)') ' Division by zero is imminent, resulting in NaN parameters.' + flush(6) end if ! --- END OF ADDED WARNING --- @@ -260,8 +272,8 @@ function prior(cube) result(theta) ! --- START OF ADDED WARNING --- if (any(isnan(baby_point(S%h0:S%h1)))) then - write(stdout_unit,'(A)') 'PolyChord TRACE (slice_sample): NaN detected in new parameter vector immediately after creation.' - flush(stdout_unit) + write(*,'(A)') 'PolyChord TRACE (slice_sample): NaN detected in new parameter vector immediately after creation.' + flush(6) end if ! --- END OF ADDED WARNING --- diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index f8d65854..eb41e748 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -650,15 +650,29 @@ function calc_cholesky(a) result(L) ! --- START OF ADDED WARNING --- trace_val = trace(a) - if (trace_val <= 1.d-20) then - write(stdout_unit,'(A)') 'PolyChord WARNING: Cholesky decomposition failed.' - write(stdout_unit,'(A, E12.5)') ' Matrix trace is near-zero: ', trace_val - write(stdout_unit,'(A)') ' This will likely lead to a NaN direction vector.' - flush(stdout_unit) + if (trace_val < 1.d-20) then + write(*,'(A)') 'PolyChord WARNING: Cholesky decomposition failed.' + if (trace_val < 0.d0) then + write(*,'(A, E12.5)') ' Matrix trace is NEGATIVE: ', trace_val + else + write(*,'(A, E12.5)') ' Matrix trace is near-zero: ', trace_val + end if + write(*,'(A)') ' This will likely lead to a NaN direction vector.' + flush(6) end if ! --- END OF ADDED WARNING --- - L = identity_matrix(size(a,1)) * sqrt(trace(a)) + ! L = identity_matrix(size(a,1)) * sqrt(trace(a)) + + ! Make this sqrt safe for negative inputs + L = identity_matrix(size(a,1)) * sqrt(max(trace_val, 0.d0)) + + ! --- NEW, CRITICAL TRACE --- + if (any(isnan(L))) then + write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L immediately after creation from a failed decomposition.' + flush(6) + end if + return else L(i,i)=sqrt(L(i,i)) @@ -670,6 +684,12 @@ function calc_cholesky(a) result(L) end do + ! --- NEW, FINAL SANITY CHECK --- + if (any(isnan(L))) then + write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L at function exit.' + flush(6) + endif + end function calc_cholesky function calc_covmat(x,wraparound) result(covmat) From 7a39ed4383967157ac8b40639e4cdb1b31559941 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 8 Jul 2025 14:31:21 +0100 Subject: [PATCH 06/44] add initial intel llvm makefile --- Makefile | 15 ++++++--- Makefile_intel_llvm | 75 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 5 deletions(-) create mode 100644 Makefile_intel_llvm diff --git a/Makefile b/Makefile index adca6c15..16dc1e0c 100644 --- a/Makefile +++ b/Makefile @@ -25,18 +25,24 @@ export MPI DEBUG # We can autodetect the compiler type on unix systems via the shell. # if you want to override this then just run make with # make COMPILER_TYPE= -# where is gnu or intel +# where is gnu, intel, intel_llvm, or cray ifeq "$(shell which ftn >/dev/null 2>&1; echo $$?)" "0" COMPILER_TYPE=cray -else ifeq "$(shell which ifort >/dev/null 2>&1; echo $$?)" "0" +else ifeq "$(shell which ifx >/dev/null 2>&1; echo $$?)" "0" +# Detected Intel LLVM-based compilers (ifx, icx, icpx) +COMPILER_TYPE=intel_llvm +else ifeq "$(shell which ifort >/dev/null 2>&1; echo $$?)" "0" +# Detected Intel classic compilers (ifort, icc, icpc) COMPILER_TYPE=intel else ifeq "$(shell which gfortran >/dev/null 2>&1; echo $$?)" "0" COMPILER_TYPE=gnu endif -ifeq ($(COMPILER_TYPE),intel) +ifeq ($(COMPILER_TYPE),intel_llvm) +include Makefile_intel_llvm +else ifeq ($(COMPILER_TYPE),intel) include Makefile_intel -else ifeq ($(COMPILER_TYPE),gnu) +else ifeq ($(COMPILER_TYPE),gnu) include Makefile_gnu else ifeq ($(COMPILER_TYPE),cray) include Makefile_cray @@ -118,4 +124,3 @@ veryclean: clean $(addsuffix veryclean,$(CLEANDIRS)) $(RM) *~ build dist pypolychord.egg-info pypolychord/*.pyc pypolychord/__pycache__ __pycache__ pypolychord/lib/*.so $(addsuffix veryclean,$(CLEANDIRS)) : %veryclean: $(MAKE) -C $* veryclean - diff --git a/Makefile_intel_llvm b/Makefile_intel_llvm new file mode 100644 index 00000000..92675711 --- /dev/null +++ b/Makefile_intel_llvm @@ -0,0 +1,75 @@ +# Makefile_intel_llvm (for Intel LLVM-based compilers: ifx, icx, icpx) + +ifeq ($(MPI),1) +# Using specific MPI-enabled Intel LLVM compilers +FC = mpiifx +CC = mpiicx +CXX = mpiicpx +# For shared libraries, include the -v (verbose) flag as per your command +LDSHARED = $(LD) -shared -v +else +# Using standard Intel LLVM compilers +FC = ifx +CXX = icpx +CC = icx +LDSHARED = $(LD) -shared +endif + +# CPP uses the C compiler +CPP = $(CC) +# LD uses the C/C++ compiler as the primary linker driver for mixed-language projects +# This aligns with your `LD=mpiicx` override +LD = $(CC) + +# Archive tool: Always 'ar rv' now that IPO-specific xiar is not used +AR = ar rv + + +# default flags +# -------------- +# fpp : perform preprocessing +# fpic : shared object libraries +# assume noold_maxminloc : Removed, as it might not be relevant or cause issues with ifx/icx +# heap-arrays : allocate local arrays on heap +FFLAGS += -fpp -fpic -heap-arrays +CXXFLAGS += -std=c++11 -fpic +# nofor_main : Removed, as per previous command and often not needed with modern compilers +LDFLAGS += # This line is effectively empty now that -nofor-main is removed + +# Link specific Intel runtime libraries, replacing generic -cxxlib +LDLIBS += -lifcore -lifport -limf -lsvml -liomp5 + +ifeq ($(DEBUG),1) +# Debugging mode +# -------------- +# g : enable gnu debugger compatibility +# O0 : no optimisation +# traceback : create a backtrace on system failure +# check all : all checks (whilst compiling) +# warn all : all warnings (whilst running) +# ftrapuv : Traps uninitialized variables by setting them to very large values +# debug all : additional debugging information +# gen-interfaces : generate an interface block for each routine +# warn-interfaces: warn on these interface blocks +# fpe0 : halts program if dividing by zero, etc +FFLAGS += -g -O0 -traceback -check all,noarg_temp_created -warn all -ftrapuv -debug all -gen-interfaces -warn-interfaces +CXXFLAGS += -g -O0 -traceback -Wcheck -Wremarks -Wall -Weffc++ -ftrapuv -debug all +LDFLAGS += -g +else +# Optimised mode +# -------------- +# ipo : (Removed -ipo and related code, as per your modifications for ifx/icx) +# O3 : maximum optimisation +# no-prec-div : (Removed, as per your modifications) +# static : link intel libraries statically (covered by specific LDLIBS) +# xHost : maximise architecture usage (from $(HOST) variable) +# w : turn off all warnings +# vec-report0 : (Removed, as per your modifications) +# opt-report0 : (Removed, as per your modifications) +FFLAGS += -O3 $(HOST) -w +CXXFLAGS += -O3 $(HOST) -w + +# The ifdef IPO block for AR is removed as IPO itself is removed for this config. +# AR remains the default 'ar rv'. + +endif \ No newline at end of file From 02ab42bcdbdc2d3fd8f495e9f41fa8158f90ff2c Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 8 Jul 2025 14:38:11 +0100 Subject: [PATCH 07/44] add less aggressive optimisation --- Makefile_intel_llvm | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/Makefile_intel_llvm b/Makefile_intel_llvm index 92675711..d912f499 100644 --- a/Makefile_intel_llvm +++ b/Makefile_intel_llvm @@ -5,23 +5,20 @@ ifeq ($(MPI),1) FC = mpiifx CC = mpiicx CXX = mpiicpx -# For shared libraries, include the -v (verbose) flag as per your command -LDSHARED = $(LD) -shared -v else # Using standard Intel LLVM compilers FC = ifx CXX = icpx CC = icx -LDSHARED = $(LD) -shared endif +LDSHARED = $(LD) -shared # CPP uses the C compiler CPP = $(CC) # LD uses the C/C++ compiler as the primary linker driver for mixed-language projects -# This aligns with your `LD=mpiicx` override LD = $(CC) -# Archive tool: Always 'ar rv' now that IPO-specific xiar is not used +# Archive tool AR = ar rv @@ -29,14 +26,12 @@ AR = ar rv # -------------- # fpp : perform preprocessing # fpic : shared object libraries -# assume noold_maxminloc : Removed, as it might not be relevant or cause issues with ifx/icx # heap-arrays : allocate local arrays on heap FFLAGS += -fpp -fpic -heap-arrays CXXFLAGS += -std=c++11 -fpic -# nofor_main : Removed, as per previous command and often not needed with modern compilers -LDFLAGS += # This line is effectively empty now that -nofor-main is removed +LDFLAGS += -# Link specific Intel runtime libraries, replacing generic -cxxlib +# Link specific Intel runtime libraries LDLIBS += -lifcore -lifport -limf -lsvml -liomp5 ifeq ($(DEBUG),1) @@ -58,18 +53,11 @@ LDFLAGS += -g else # Optimised mode # -------------- -# ipo : (Removed -ipo and related code, as per your modifications for ifx/icx) -# O3 : maximum optimisation -# no-prec-div : (Removed, as per your modifications) -# static : link intel libraries statically (covered by specific LDLIBS) -# xHost : maximise architecture usage (from $(HOST) variable) -# w : turn off all warnings -# vec-report0 : (Removed, as per your modifications) -# opt-report0 : (Removed, as per your modifications) -FFLAGS += -O3 $(HOST) -w -CXXFLAGS += -O3 $(HOST) -w - -# The ifdef IPO block for AR is removed as IPO itself is removed for this config. -# AR remains the default 'ar rv'. +# O2 : A high but safe level of optimisation. +# fp-model precise: Disables value-unsafe floating-point optimisations. This is crucial for numerical stability and cross-compiler consistency. +# xHost : maximise architecture usage (from $(HOST) variable) +# warn all : Enable all warnings instead of suppressing them with -w. +FFLAGS += -O2 -fp-model precise $(HOST) -warn all +CXXFLAGS += -O2 -fp-model precise $(HOST) -Wall endif \ No newline at end of file From c750529991f1432dd6d32f1e0f834f6a843dc822 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 8 Jul 2025 14:55:18 +0100 Subject: [PATCH 08/44] add print function for fortran --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 16dc1e0c..c6bc6410 100644 --- a/Makefile +++ b/Makefile @@ -111,6 +111,8 @@ print_CC: @echo $(CC) print_CXX: @echo $(CXX) +print_FC: + @echo $(FC) CLEANDIRS = $(POLYCHORD_DIR) $(PYPOLYCHORD_DIR) $(LIKELIHOOD_DIR) $(BIN_DIR) $(LIB_DIR) $(DRIVERS_DIR) .PHONY: clean veryclean print_CC print_CXX $(addsuffix clean,$(CLEANDIRS)) $(addsuffix veryclean,$(CLEANDIRS)) From 112a7077b5c713107c6a9942ad3056adb8130e61 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Wed, 9 Jul 2025 11:01:01 +0100 Subject: [PATCH 09/44] remove flush --- src/polychord/calculate.f90 | 1 - src/polychord/chordal_sampling.f90 | 3 --- src/polychord/utils.F90 | 3 --- 3 files changed, 7 deletions(-) diff --git a/src/polychord/calculate.f90 b/src/polychord/calculate.f90 index 1ded390a..5b24f006 100644 --- a/src/polychord/calculate.f90 +++ b/src/polychord/calculate.f90 @@ -43,7 +43,6 @@ function prior(cube) result(theta) ! --- START OF ADDED WARNING --- if (any(isnan(cube))) then write(*,'(A)') 'PolyChord TRACE (calculate_point): NaN detected in hypercube vector before prior transformation.' - flush(6) end if ! --- END OF ADDED WARNING --- diff --git a/src/polychord/chordal_sampling.f90 b/src/polychord/chordal_sampling.f90 index f4d0e820..b5af21d1 100644 --- a/src/polychord/chordal_sampling.f90 +++ b/src/polychord/chordal_sampling.f90 @@ -82,7 +82,6 @@ function prior(cube) result(theta) else write(*,'(A)') ' The source is NOT the cholesky matrix. Problem is in generate_nhats or matmul.' end if - flush(6) end if ! --- END OF TRACE --- @@ -97,7 +96,6 @@ function prior(cube) result(theta) if (w < 1.d-20) then write(*,'(A)') 'PolyChord WARNING: Slice sampling direction vector has zero magnitude (w=0).' write(*,'(A)') ' Division by zero is imminent, resulting in NaN parameters.' - flush(6) end if ! --- END OF ADDED WARNING --- @@ -273,7 +271,6 @@ function prior(cube) result(theta) ! --- START OF ADDED WARNING --- if (any(isnan(baby_point(S%h0:S%h1)))) then write(*,'(A)') 'PolyChord TRACE (slice_sample): NaN detected in new parameter vector immediately after creation.' - flush(6) end if ! --- END OF ADDED WARNING --- diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index eb41e748..5379a637 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -658,7 +658,6 @@ function calc_cholesky(a) result(L) write(*,'(A, E12.5)') ' Matrix trace is near-zero: ', trace_val end if write(*,'(A)') ' This will likely lead to a NaN direction vector.' - flush(6) end if ! --- END OF ADDED WARNING --- @@ -670,7 +669,6 @@ function calc_cholesky(a) result(L) ! --- NEW, CRITICAL TRACE --- if (any(isnan(L))) then write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L immediately after creation from a failed decomposition.' - flush(6) end if return @@ -687,7 +685,6 @@ function calc_cholesky(a) result(L) ! --- NEW, FINAL SANITY CHECK --- if (any(isnan(L))) then write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L at function exit.' - flush(6) endif end function calc_cholesky From c7115b2fa1f9d757b715b6c4c16829c80760fe9e Mon Sep 17 00:00:00 2001 From: dprelogo Date: Wed, 9 Jul 2025 11:14:29 +0100 Subject: [PATCH 10/44] add two more traces --- src/polychord/mpi_utils.F90 | 6 ++++++ src/polychord/nested_sampling.F90 | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/src/polychord/mpi_utils.F90 b/src/polychord/mpi_utils.F90 index e5bd84bc..3bb11ad4 100644 --- a/src/polychord/mpi_utils.F90 +++ b/src/polychord/mpi_utils.F90 @@ -435,6 +435,12 @@ subroutine throw_babies(baby_points,nlike,epoch,mpi_information) integer, intent(in) :: epoch !> The epoch the babies were generated in type(mpi_bundle), intent(in) :: mpi_information + ! --- START OF ADDED TRACE --- + if (any(isnan(baby_points))) then + write(*,'(A, I0, A)') 'PolyChord TRACE (throw_babies): Worker rank ', mpi_information%rank, ' is about to send a NaN baby_points array to the root.' + endif + ! --- END OF ADDED TRACE --- + call MPI_SEND( &! baby_points, &! size(baby_points,1)*size(baby_points,2),&! diff --git a/src/polychord/nested_sampling.F90 b/src/polychord/nested_sampling.F90 index 276fe2e2..bac976a8 100644 --- a/src/polychord/nested_sampling.F90 +++ b/src/polychord/nested_sampling.F90 @@ -444,6 +444,10 @@ end subroutine dumper else !(myrank/=root) + ! --- START OF HELLO WORLD TEST --- + write(*, '(A, I0, A)') 'Worker rank ', mpi_information%rank, ' reporting for duty.' + ! --- END OF HELLO WORLD TEST --- + ! These are the worker tasks ! -------------------------- ! From 08e97efc3806504dca1a9d833d740966ea906381 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Wed, 9 Jul 2025 12:22:58 +0100 Subject: [PATCH 11/44] add logs --- src/polychord/nested_sampling.F90 | 6 ++++++ src/polychord/random_utils.F90 | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/src/polychord/nested_sampling.F90 b/src/polychord/nested_sampling.F90 index bac976a8..8918782a 100644 --- a/src/polychord/nested_sampling.F90 +++ b/src/polychord/nested_sampling.F90 @@ -489,6 +489,12 @@ end subroutine dumper time1 = time() slice_time = slice_time + time1-time0 + ! --- NEW TRACE --- + if (any(isnan(baby_points))) then + write(*,'(A, I0, A)') 'PolyChord TRACE (NestedSampling): Worker rank ', mpi_information%rank, ' received NaN array from SliceSampling.' + endif + ! --- END TRACE --- + ! 3) Send the baby points back call throw_babies(baby_points,nlike,worker_epoch,mpi_information) diff --git a/src/polychord/random_utils.F90 b/src/polychord/random_utils.F90 index 1192606e..f27dcbe1 100644 --- a/src/polychord/random_utils.F90 +++ b/src/polychord/random_utils.F90 @@ -290,6 +290,13 @@ function random_direction(nDims) random_direction = random_gaussian(nDims) ! Calculate the modulus squared random_direction2 = dot_product(random_direction,random_direction) + + ! --- START OF ADDED TRACE (Non-invasive) --- + if (random_direction2 <= 1.d-30) then + write(*, '(A)') 'PolyChord TRACE (random_direction): Generated a near-zero-length random vector. This could cause issues if not handled.' + flush(6) + endif + ! --- END OF ADDED TRACE --- end do ! normalise the vector From a061d68c0ec4babf9f078c7891ee41894c0b457e Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 14 Jul 2025 12:58:07 +0100 Subject: [PATCH 12/44] inject intentional NaN --- src/polychord/generate.F90 | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index fc8e3a25..631bb457 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -155,6 +155,13 @@ function prior(cube) result(theta) ! Generate a random coordinate live_point(settings%h0:settings%h1) = random_reals(settings%nDims) + ! --- START OF INTENTIONAL NaN INJECTION (FOR TESTING) --- + if (RTI%nlive(1) == 10) then + write(*,*) 'TEST: Intentionally injecting a NaN into the hypercube vector.' + live_point(settings%h0) = 0.0_dp / 0.0_dp + end if + ! --- END OF INTENTIONAL NaN INJECTION --- + ! Compute physical coordinates, likelihoods and derived parameters time0 = time() call calculate_point( loglikelihood, prior, live_point, settings, nlike) From 54cd7463fca18558af5670b138cb09d2cefaabb5 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 14 Jul 2025 15:30:29 +0100 Subject: [PATCH 13/44] inject NaN on purpose --- src/polychord/generate.F90 | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index 631bb457..7c794193 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -113,6 +113,8 @@ function prior(cube) result(theta) integer :: nlike ! number of likelihood calls integer :: nprior, ndiscarded integer :: ngenerated ! use to track order points are generated in + ! --- NEW COUNTER FOR NaN INJECTION TEST --- + integer :: calculation_counter real(dp) :: time0,time1,total_time real(dp),dimension(size(settings%grade_dims)) :: speed @@ -121,6 +123,7 @@ function prior(cube) result(theta) ! Initialise number of likelihood calls to zero here nlike = 0 ngenerated = 1 + calculation_counter = 0 ! Initialize the new counter if(is_root(mpi_information)) then @@ -156,9 +159,10 @@ function prior(cube) result(theta) live_point(settings%h0:settings%h1) = random_reals(settings%nDims) ! --- START OF INTENTIONAL NaN INJECTION (FOR TESTING) --- - if (RTI%nlive(1) == 10) then - write(*,*) 'TEST: Intentionally injecting a NaN into the hypercube vector.' - live_point(settings%h0) = 0.0_dp / 0.0_dp + calculation_counter = calculation_counter + 1 + if (calculation_counter == 5) then + write(*,*) 'TEST (Linear Mode): Intentionally injecting a NaN via sqrt(-1.0).' + live_point(settings%h0) = sqrt(-1.0_dp) end if ! --- END OF INTENTIONAL NaN INJECTION --- @@ -256,6 +260,14 @@ function prior(cube) result(theta) ! The workers simply generate and send points until they're told to stop by the administrator do while(live_point_needed(live_point,mpi_information)) + ! --- START OF INTENTIONAL NaN INJECTION (FOR TESTING) --- + calculation_counter = calculation_counter + 1 + if (mpi_information%rank == 1 .and. calculation_counter == 5) then + write(*,*) 'TEST (Parallel Mode, Rank 1): Intentionally injecting a NaN via sqrt(-1.0).' + live_point(settings%h0) = sqrt(-1.0_dp) + end if + ! --- END OF INTENTIONAL NaN INJECTION --- + time0 = time() call calculate_point( loglikelihood, prior, live_point, settings,nlike) ! Compute physical coordinates, likelihoods and derived parameters ndiscarded=ndiscarded+1 From 9aec875dd4362f4618c2542d70f5a99de0e7e5fd Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 14 Jul 2025 15:30:46 +0100 Subject: [PATCH 14/44] add additional NaN checks --- src/polychord/calculate.f90 | 20 ++++++++++++++++++++ src/polychord/chordal_sampling.f90 | 25 +++++++++++++++++++++++++ src/polychord/nested_sampling.F90 | 10 ++++++++++ 3 files changed, 55 insertions(+) diff --git a/src/polychord/calculate.f90 b/src/polychord/calculate.f90 index 5b24f006..c4d78c23 100644 --- a/src/polychord/calculate.f90 +++ b/src/polychord/calculate.f90 @@ -40,6 +40,12 @@ function prior(cube) result(theta) cube = point(settings%h0:settings%h1) + ! --- NEW AGGRESSIVE CHECK 0 --- + if (any(isnan(point))) then + write(*, '(A)') 'TRACE 0 (calculate_point): NaN detected in the full input POINT vector.' + write(*, '(A, *(F24.15))') ' point = ', point + end if + ! --- START OF ADDED WARNING --- if (any(isnan(cube))) then write(*,'(A)') 'PolyChord TRACE (calculate_point): NaN detected in hypercube vector before prior transformation.' @@ -51,7 +57,21 @@ function prior(cube) result(theta) logL = settings%logzero else where(settings%wraparound) cube = modulo(cube,1d0) + + if (any(isnan(cube))) then + ! --- NEW AGGRESSIVE CHECK 1 --- + write(*,'(A)') 'TRACE 1 (calculate_point): NaN detected in hypercube vector AFTER MODULO.' + write(*, '(A, *(F24.15))') ' cube = ', cube + end if + theta = prior(cube) + + ! --- NEW AGGRESSIVE CHECK 2 (MOST IMPORTANT) --- + if (any(isnan(theta))) then + write(*,'(A)') 'TRACE 2 (calculate_point): NaN detected in physical vector THETA before likelihood call.' + write(*, '(A, *(F24.15))') ' theta = ', theta + end if + logL = loglikelihood(theta,phi) end if diff --git a/src/polychord/chordal_sampling.f90 b/src/polychord/chordal_sampling.f90 index b5af21d1..c33b88c6 100644 --- a/src/polychord/chordal_sampling.f90 +++ b/src/polychord/chordal_sampling.f90 @@ -65,6 +65,15 @@ function prior(cube) result(theta) ! Start the baby point at the seed point previous_point = seed_point + ! --- NEW AGGRESSIVE CHECK 5 --- + if (any(isnan(seed_point))) then + write(*, '(A)') 'TRACE 5 (SliceSampling): NaN detected in the input seed_point.' + write(*, '(A, *(F24.15))') ' seed_point = ', seed_point + end if + if (any(isnan(cholesky))) then + write(*, '(A)') 'TRACE 5 (SliceSampling): NaN detected in the input cholesky matrix.' + end if + ! Initialise the likelihood counter at 0 nlike = 0 @@ -225,6 +234,15 @@ function prior(cube) result(theta) ! The lower bound real(dp), dimension(S%nTotal) :: L + ! --- NEW AGGRESSIVE CHECK 3 --- + if (any(isnan(x0))) then + write(*, '(A)') 'TRACE 3 (slice_sample): NaN detected in the input seed point x0.' + write(*, '(A, *(F24.15))') ' x0 = ', x0 + end if + if (any(isnan(nhat))) then + write(*, '(A)') 'TRACE 3 (slice_sample): NaN detected in the input direction vector nhat.' + write(*, '(A, *(F24.15))') ' nhat = ', nhat + end if real(dp) :: temp_random @@ -265,6 +283,13 @@ function prior(cube) result(theta) ! Find the distance between x0 and R x0Rd= distance(x0(S%h0:S%h1),R(S%h0:S%h1), [(.false., i=1,S%nDims)]) + ! --- NEW AGGRESSIVE CHECK 4 --- + if (isnan(x0Ld) .or. isnan(x0Rd)) then + write(*, '(A)') 'TRACE 4 (slice_sample): NaN detected in boundary distances.' + write(*, '(A, F24.15)') ' x0Ld = ', x0Ld + write(*, '(A, F24.15)') ' x0Rd = ', x0Rd + end if + ! Draw a random point within L and R baby_point(S%h0:S%h1) = x0(S%h0:S%h1)+ (random_real() * (x0Rd+x0Ld) - x0Ld) * nhat diff --git a/src/polychord/nested_sampling.F90 b/src/polychord/nested_sampling.F90 index 8918782a..6a09b807 100644 --- a/src/polychord/nested_sampling.F90 +++ b/src/polychord/nested_sampling.F90 @@ -247,6 +247,16 @@ end subroutine dumper ! Choose the cholesky decomposition for the cluster cholesky = RTI%cholesky(:,:,cluster_id) + ! --- NEW AGGRESSIVE CHECK 6 --- + if (any(isnan(seed_point))) then + write(*, '(A, I0)') 'TRACE 6 (NestedSampling): NaN seed_point chosen from cluster ', cluster_id + write(*, '(A, *(F24.15))') ' seed_point = ', seed_point + end if + if (any(isnan(cholesky))) then + write(*, '(A, I0)') 'TRACE 6 (NestedSampling): NaN cholesky matrix chosen from cluster ', cluster_id + ! Not printing the matrix as it's large, its presence is enough. + end if + ! Get the loglikelihood contour we're generating from logL = RTI%logLp(cluster_id) From aeaecd2edac8328841c9fe65a6085c1a076156b7 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 14 Jul 2025 22:04:15 +0100 Subject: [PATCH 15/44] put check after declaration --- src/polychord/chordal_sampling.f90 | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/polychord/chordal_sampling.f90 b/src/polychord/chordal_sampling.f90 index c33b88c6..59a8f528 100644 --- a/src/polychord/chordal_sampling.f90 +++ b/src/polychord/chordal_sampling.f90 @@ -234,6 +234,11 @@ function prior(cube) result(theta) ! The lower bound real(dp), dimension(S%nTotal) :: L + real(dp) :: temp_random + + integer :: i_step, i + real(dp) :: x0Rd, x0Ld + ! --- NEW AGGRESSIVE CHECK 3 --- if (any(isnan(x0))) then write(*, '(A)') 'TRACE 3 (slice_sample): NaN detected in the input seed point x0.' @@ -244,11 +249,7 @@ function prior(cube) result(theta) write(*, '(A, *(F24.15))') ' nhat = ', nhat end if - real(dp) :: temp_random - - integer :: i_step, i - real(dp) :: x0Rd, x0Ld - + ! Select initial start and end points temp_random = random_real() L(S%h0:S%h1) = x0(S%h0:S%h1) - temp_random * w * nhat From 4380549c6972262678d2c48d139d3456a98dc74c Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 14 Jul 2025 22:09:03 +0100 Subject: [PATCH 16/44] push sqrt to a variable --- src/polychord/generate.F90 | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index 7c794193..d39f2ef1 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -115,6 +115,8 @@ function prior(cube) result(theta) integer :: ngenerated ! use to track order points are generated in ! --- NEW COUNTER FOR NaN INJECTION TEST --- integer :: calculation_counter + ! --- NEW VARIABLE FOR NaN INJECTION TEST --- + real(dp) :: nan_generator real(dp) :: time0,time1,total_time real(dp),dimension(size(settings%grade_dims)) :: speed @@ -162,7 +164,8 @@ function prior(cube) result(theta) calculation_counter = calculation_counter + 1 if (calculation_counter == 5) then write(*,*) 'TEST (Linear Mode): Intentionally injecting a NaN via sqrt(-1.0).' - live_point(settings%h0) = sqrt(-1.0_dp) + nan_generator = -1.0_dp + live_point(settings%h0) = sqrt(nan_generator) end if ! --- END OF INTENTIONAL NaN INJECTION --- @@ -264,7 +267,8 @@ function prior(cube) result(theta) calculation_counter = calculation_counter + 1 if (mpi_information%rank == 1 .and. calculation_counter == 5) then write(*,*) 'TEST (Parallel Mode, Rank 1): Intentionally injecting a NaN via sqrt(-1.0).' - live_point(settings%h0) = sqrt(-1.0_dp) + nan_generator = -1.0_dp + live_point(settings%h0) = sqrt(nan_generator) end if ! --- END OF INTENTIONAL NaN INJECTION --- From 8fcd73f83e9bbf99b9ef973d02ea2a70a2bee1e9 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 15 Jul 2025 09:20:40 +0100 Subject: [PATCH 17/44] add explicit isnan test --- src/polychord/generate.F90 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index d39f2ef1..c108f2a4 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -166,6 +166,9 @@ function prior(cube) result(theta) write(*,*) 'TEST (Linear Mode): Intentionally injecting a NaN via sqrt(-1.0).' nan_generator = -1.0_dp live_point(settings%h0) = sqrt(nan_generator) + if (any(isnan(live_point))) then + write(*,*) 'TEST (Linear Mode): NaN detected in live_point, isnan works.' + end if end if ! --- END OF INTENTIONAL NaN INJECTION --- @@ -269,6 +272,9 @@ function prior(cube) result(theta) write(*,*) 'TEST (Parallel Mode, Rank 1): Intentionally injecting a NaN via sqrt(-1.0).' nan_generator = -1.0_dp live_point(settings%h0) = sqrt(nan_generator) + if (any(isnan(live_point))) then + write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, isnan works.' + end if end if ! --- END OF INTENTIONAL NaN INJECTION --- From 29f2d6bec61ee9ec1c6776804499180853312f15 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 15 Jul 2025 11:03:52 +0100 Subject: [PATCH 18/44] change from isnan --- src/polychord/generate.F90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index c108f2a4..75f22abb 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -166,8 +166,8 @@ function prior(cube) result(theta) write(*,*) 'TEST (Linear Mode): Intentionally injecting a NaN via sqrt(-1.0).' nan_generator = -1.0_dp live_point(settings%h0) = sqrt(nan_generator) - if (any(isnan(live_point))) then - write(*,*) 'TEST (Linear Mode): NaN detected in live_point, isnan works.' + if (any(live_point /= live_point)) then + write(*,*) 'TEST (Linear Mode): NaN detected in live_point, /= works.' end if end if ! --- END OF INTENTIONAL NaN INJECTION --- @@ -272,8 +272,8 @@ function prior(cube) result(theta) write(*,*) 'TEST (Parallel Mode, Rank 1): Intentionally injecting a NaN via sqrt(-1.0).' nan_generator = -1.0_dp live_point(settings%h0) = sqrt(nan_generator) - if (any(isnan(live_point))) then - write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, isnan works.' + if (any(live_point /= live_point)) then + write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, /= works.' end if end if ! --- END OF INTENTIONAL NaN INJECTION --- From c2d9a24dd1fd91f1d7594f55e8f2d3ebd1d959be Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 15 Jul 2025 12:02:17 +0100 Subject: [PATCH 19/44] change to iee_is_nan --- src/polychord/generate.F90 | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index 75f22abb..a6a2197b 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -4,6 +4,7 @@ !! * GenerateLivePoints module generate_module use utils_module, only: dp + use, intrinsic :: ieee_arithmetic implicit none @@ -166,8 +167,10 @@ function prior(cube) result(theta) write(*,*) 'TEST (Linear Mode): Intentionally injecting a NaN via sqrt(-1.0).' nan_generator = -1.0_dp live_point(settings%h0) = sqrt(nan_generator) - if (any(live_point /= live_point)) then - write(*,*) 'TEST (Linear Mode): NaN detected in live_point, /= works.' + if (any(iee_is_nan(live_point))) then + write(*,*) 'TEST (Linear Mode): NaN detected in live_point, iee_is_nan works.' + else + write(*,*) 'TEST (Linear Mode): NaN not detected in live_point, iee_is_nan failed.' end if end if ! --- END OF INTENTIONAL NaN INJECTION --- @@ -272,8 +275,10 @@ function prior(cube) result(theta) write(*,*) 'TEST (Parallel Mode, Rank 1): Intentionally injecting a NaN via sqrt(-1.0).' nan_generator = -1.0_dp live_point(settings%h0) = sqrt(nan_generator) - if (any(live_point /= live_point)) then - write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, /= works.' + if (any(iee_is_nan(live_point))) then + write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, iee_is_nan works.' + else + write(*,*) 'TEST (Parallel Mode, Rank 1): NaN not detected in live_point, iee_is_nan failed.' end if end if ! --- END OF INTENTIONAL NaN INJECTION --- From 256bfb3146b6ba1a12fb2b8e2ee11b3a386a00a3 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 15 Jul 2025 12:05:27 +0100 Subject: [PATCH 20/44] correct typo --- src/polychord/generate.F90 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index a6a2197b..872843ed 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -167,10 +167,10 @@ function prior(cube) result(theta) write(*,*) 'TEST (Linear Mode): Intentionally injecting a NaN via sqrt(-1.0).' nan_generator = -1.0_dp live_point(settings%h0) = sqrt(nan_generator) - if (any(iee_is_nan(live_point))) then - write(*,*) 'TEST (Linear Mode): NaN detected in live_point, iee_is_nan works.' + if (any(ieee_is_nan(live_point))) then + write(*,*) 'TEST (Linear Mode): NaN detected in live_point, ieee_is_nan works.' else - write(*,*) 'TEST (Linear Mode): NaN not detected in live_point, iee_is_nan failed.' + write(*,*) 'TEST (Linear Mode): NaN not detected in live_point, ieee_is_nan failed.' end if end if ! --- END OF INTENTIONAL NaN INJECTION --- @@ -275,10 +275,10 @@ function prior(cube) result(theta) write(*,*) 'TEST (Parallel Mode, Rank 1): Intentionally injecting a NaN via sqrt(-1.0).' nan_generator = -1.0_dp live_point(settings%h0) = sqrt(nan_generator) - if (any(iee_is_nan(live_point))) then - write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, iee_is_nan works.' + if (any(ieee_is_nan(live_point))) then + write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, ieee_is_nan works.' else - write(*,*) 'TEST (Parallel Mode, Rank 1): NaN not detected in live_point, iee_is_nan failed.' + write(*,*) 'TEST (Parallel Mode, Rank 1): NaN not detected in live_point, ieee_is_nan failed.' end if end if ! --- END OF INTENTIONAL NaN INJECTION --- From 5d6536ad89a6ce48370a73cb505ff0320397c5a5 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 15 Jul 2025 12:39:25 +0100 Subject: [PATCH 21/44] add scalar check --- src/polychord/generate.F90 | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index 872843ed..f0b72d77 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -275,6 +275,15 @@ function prior(cube) result(theta) write(*,*) 'TEST (Parallel Mode, Rank 1): Intentionally injecting a NaN via sqrt(-1.0).' nan_generator = -1.0_dp live_point(settings%h0) = sqrt(nan_generator) + + write(*,*) 'DEBUG: The injected value is: ', live_point(settings%h0) + + if (ieee_is_nan(live_point(settings%h0))) then + write(*,*) 'DEBUG: Scalar check PASSED. The value is a NaN.' + else + write(*,*) 'DEBUG: Scalar check FAILED. The value is NOT a NaN.' + end if + if (any(ieee_is_nan(live_point))) then write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, ieee_is_nan works.' else From 725a2b42ba2f676f5d9e6cc9077a015545088d22 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 15 Jul 2025 13:08:40 +0100 Subject: [PATCH 22/44] add no-finite-math flag --- Makefile_gnu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile_gnu b/Makefile_gnu index f8200644..1c2f7035 100644 --- a/Makefile_gnu +++ b/Makefile_gnu @@ -63,7 +63,7 @@ else # Optimised mode # -------------- # Ofast : maximum optimisation -FFLAGS += -Ofast +FFLAGS += -Ofast -fno-finite-math-only # no-finit-math-only is needed for detectinng NaNs CXXFLAGS += -Ofast CXFLAGS += -Ofast endif From abb0c71bb7f4338a7aec619528903031b837a837 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 15 Jul 2025 14:11:24 +0100 Subject: [PATCH 23/44] turn off intentional NaN injection --- src/polychord/generate.F90 | 80 +++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index f0b72d77..fe8e64aa 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -114,10 +114,10 @@ function prior(cube) result(theta) integer :: nlike ! number of likelihood calls integer :: nprior, ndiscarded integer :: ngenerated ! use to track order points are generated in - ! --- NEW COUNTER FOR NaN INJECTION TEST --- - integer :: calculation_counter - ! --- NEW VARIABLE FOR NaN INJECTION TEST --- - real(dp) :: nan_generator + ! ! --- NEW COUNTER FOR NaN INJECTION TEST --- + ! integer :: calculation_counter + ! ! --- NEW VARIABLE FOR NaN INJECTION TEST --- + ! real(dp) :: nan_generator real(dp) :: time0,time1,total_time real(dp),dimension(size(settings%grade_dims)) :: speed @@ -161,19 +161,19 @@ function prior(cube) result(theta) ! Generate a random coordinate live_point(settings%h0:settings%h1) = random_reals(settings%nDims) - ! --- START OF INTENTIONAL NaN INJECTION (FOR TESTING) --- - calculation_counter = calculation_counter + 1 - if (calculation_counter == 5) then - write(*,*) 'TEST (Linear Mode): Intentionally injecting a NaN via sqrt(-1.0).' - nan_generator = -1.0_dp - live_point(settings%h0) = sqrt(nan_generator) - if (any(ieee_is_nan(live_point))) then - write(*,*) 'TEST (Linear Mode): NaN detected in live_point, ieee_is_nan works.' - else - write(*,*) 'TEST (Linear Mode): NaN not detected in live_point, ieee_is_nan failed.' - end if - end if - ! --- END OF INTENTIONAL NaN INJECTION --- + ! ! --- START OF INTENTIONAL NaN INJECTION (FOR TESTING) --- + ! calculation_counter = calculation_counter + 1 + ! if (calculation_counter == 5) then + ! write(*,*) 'TEST (Linear Mode): Intentionally injecting a NaN via sqrt(-1.0).' + ! nan_generator = -1.0_dp + ! live_point(settings%h0) = sqrt(nan_generator) + ! if (any(ieee_is_nan(live_point))) then + ! write(*,*) 'TEST (Linear Mode): NaN detected in live_point, ieee_is_nan works.' + ! else + ! write(*,*) 'TEST (Linear Mode): NaN not detected in live_point, ieee_is_nan failed.' + ! end if + ! end if + ! ! --- END OF INTENTIONAL NaN INJECTION --- ! Compute physical coordinates, likelihoods and derived parameters time0 = time() @@ -268,29 +268,29 @@ function prior(cube) result(theta) ! The workers simply generate and send points until they're told to stop by the administrator - do while(live_point_needed(live_point,mpi_information)) - ! --- START OF INTENTIONAL NaN INJECTION (FOR TESTING) --- - calculation_counter = calculation_counter + 1 - if (mpi_information%rank == 1 .and. calculation_counter == 5) then - write(*,*) 'TEST (Parallel Mode, Rank 1): Intentionally injecting a NaN via sqrt(-1.0).' - nan_generator = -1.0_dp - live_point(settings%h0) = sqrt(nan_generator) - - write(*,*) 'DEBUG: The injected value is: ', live_point(settings%h0) - - if (ieee_is_nan(live_point(settings%h0))) then - write(*,*) 'DEBUG: Scalar check PASSED. The value is a NaN.' - else - write(*,*) 'DEBUG: Scalar check FAILED. The value is NOT a NaN.' - end if - - if (any(ieee_is_nan(live_point))) then - write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, ieee_is_nan works.' - else - write(*,*) 'TEST (Parallel Mode, Rank 1): NaN not detected in live_point, ieee_is_nan failed.' - end if - end if - ! --- END OF INTENTIONAL NaN INJECTION --- + ! do while(live_point_needed(live_point,mpi_information)) + ! ! --- START OF INTENTIONAL NaN INJECTION (FOR TESTING) --- + ! calculation_counter = calculation_counter + 1 + ! if (mpi_information%rank == 1 .and. calculation_counter == 5) then + ! write(*,*) 'TEST (Parallel Mode, Rank 1): Intentionally injecting a NaN via sqrt(-1.0).' + ! nan_generator = -1.0_dp + ! live_point(settings%h0) = sqrt(nan_generator) + + ! write(*,*) 'DEBUG: The injected value is: ', live_point(settings%h0) + + ! if (ieee_is_nan(live_point(settings%h0))) then + ! write(*,*) 'DEBUG: Scalar check PASSED. The value is a NaN.' + ! else + ! write(*,*) 'DEBUG: Scalar check FAILED. The value is NOT a NaN.' + ! end if + + ! if (any(ieee_is_nan(live_point))) then + ! write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, ieee_is_nan works.' + ! else + ! write(*,*) 'TEST (Parallel Mode, Rank 1): NaN not detected in live_point, ieee_is_nan failed.' + ! end if + ! end if + ! ! --- END OF INTENTIONAL NaN INJECTION --- time0 = time() call calculate_point( loglikelihood, prior, live_point, settings,nlike) ! Compute physical coordinates, likelihoods and derived parameters From 01c39a7f378b35c5c43532e5ab4fe7ad01b67db7 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 15 Jul 2025 14:16:38 +0100 Subject: [PATCH 24/44] remove flush --- src/polychord/random_utils.F90 | 1 - 1 file changed, 1 deletion(-) diff --git a/src/polychord/random_utils.F90 b/src/polychord/random_utils.F90 index f27dcbe1..53ed0a22 100644 --- a/src/polychord/random_utils.F90 +++ b/src/polychord/random_utils.F90 @@ -294,7 +294,6 @@ function random_direction(nDims) ! --- START OF ADDED TRACE (Non-invasive) --- if (random_direction2 <= 1.d-30) then write(*, '(A)') 'PolyChord TRACE (random_direction): Generated a near-zero-length random vector. This could cause issues if not handled.' - flush(6) endif ! --- END OF ADDED TRACE --- end do From 7f9a3f99c99a54aff45f5153a5bf1e53779b6d3d Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 15 Jul 2025 14:17:29 +0100 Subject: [PATCH 25/44] switch to ieee_is_nan --- src/polychord/calculate.f90 | 9 +++++---- src/polychord/chordal_sampling.f90 | 17 +++++++++-------- src/polychord/mpi_utils.F90 | 3 ++- src/polychord/nested_sampling.F90 | 7 ++++--- src/polychord/utils.F90 | 5 +++-- 5 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/polychord/calculate.f90 b/src/polychord/calculate.f90 index c4d78c23..7aae0000 100644 --- a/src/polychord/calculate.f90 +++ b/src/polychord/calculate.f90 @@ -1,5 +1,6 @@ module calculate_module use utils_module, only: dp + use, intrinsic :: ieee_arithmetic implicit none contains @@ -41,13 +42,13 @@ function prior(cube) result(theta) cube = point(settings%h0:settings%h1) ! --- NEW AGGRESSIVE CHECK 0 --- - if (any(isnan(point))) then + if (any(ieee_is_nan(point))) then write(*, '(A)') 'TRACE 0 (calculate_point): NaN detected in the full input POINT vector.' write(*, '(A, *(F24.15))') ' point = ', point end if ! --- START OF ADDED WARNING --- - if (any(isnan(cube))) then + if (any(ieee_is_nan(cube))) then write(*,'(A)') 'PolyChord TRACE (calculate_point): NaN detected in hypercube vector before prior transformation.' end if ! --- END OF ADDED WARNING --- @@ -58,7 +59,7 @@ function prior(cube) result(theta) else where(settings%wraparound) cube = modulo(cube,1d0) - if (any(isnan(cube))) then + if (any(ieee_is_nan(cube))) then ! --- NEW AGGRESSIVE CHECK 1 --- write(*,'(A)') 'TRACE 1 (calculate_point): NaN detected in hypercube vector AFTER MODULO.' write(*, '(A, *(F24.15))') ' cube = ', cube @@ -67,7 +68,7 @@ function prior(cube) result(theta) theta = prior(cube) ! --- NEW AGGRESSIVE CHECK 2 (MOST IMPORTANT) --- - if (any(isnan(theta))) then + if (any(ieee_is_nan(theta))) then write(*,'(A)') 'TRACE 2 (calculate_point): NaN detected in physical vector THETA before likelihood call.' write(*, '(A, *(F24.15))') ' theta = ', theta end if diff --git a/src/polychord/chordal_sampling.f90 b/src/polychord/chordal_sampling.f90 index 59a8f528..e481a35c 100644 --- a/src/polychord/chordal_sampling.f90 +++ b/src/polychord/chordal_sampling.f90 @@ -1,5 +1,6 @@ module chordal_module use utils_module, only: dp + use, intrinsic :: ieee_arithmetic implicit none contains @@ -66,11 +67,11 @@ function prior(cube) result(theta) previous_point = seed_point ! --- NEW AGGRESSIVE CHECK 5 --- - if (any(isnan(seed_point))) then + if (any(ieee_is_nan(seed_point))) then write(*, '(A)') 'TRACE 5 (SliceSampling): NaN detected in the input seed_point.' write(*, '(A, *(F24.15))') ' seed_point = ', seed_point end if - if (any(isnan(cholesky))) then + if (any(ieee_is_nan(cholesky))) then write(*, '(A)') 'TRACE 5 (SliceSampling): NaN detected in the input cholesky matrix.' end if @@ -84,9 +85,9 @@ function prior(cube) result(theta) nhats = matmul(cholesky,nhats) ! --- NEW, CRITICAL TRACE --- - if (any(isnan(nhats))) then + if (any(ieee_is_nan(nhats))) then write(*,'(A)') 'PolyChord TRACE (SliceSampling): NaN detected in nhats vector immediately after matmul(cholesky,nhats).' - if (any(isnan(cholesky))) then + if (any(ieee_is_nan(cholesky))) then write(*,'(A)') ' The source is a NaN cholesky matrix.' else write(*,'(A)') ' The source is NOT the cholesky matrix. Problem is in generate_nhats or matmul.' @@ -240,11 +241,11 @@ function prior(cube) result(theta) real(dp) :: x0Rd, x0Ld ! --- NEW AGGRESSIVE CHECK 3 --- - if (any(isnan(x0))) then + if (any(ieee_is_nan(x0))) then write(*, '(A)') 'TRACE 3 (slice_sample): NaN detected in the input seed point x0.' write(*, '(A, *(F24.15))') ' x0 = ', x0 end if - if (any(isnan(nhat))) then + if (any(ieee_is_nan(nhat))) then write(*, '(A)') 'TRACE 3 (slice_sample): NaN detected in the input direction vector nhat.' write(*, '(A, *(F24.15))') ' nhat = ', nhat end if @@ -285,7 +286,7 @@ function prior(cube) result(theta) x0Rd= distance(x0(S%h0:S%h1),R(S%h0:S%h1), [(.false., i=1,S%nDims)]) ! --- NEW AGGRESSIVE CHECK 4 --- - if (isnan(x0Ld) .or. isnan(x0Rd)) then + if (ieee_is_nan(x0Ld) .or. ieee_is_nan(x0Rd)) then write(*, '(A)') 'TRACE 4 (slice_sample): NaN detected in boundary distances.' write(*, '(A, F24.15)') ' x0Ld = ', x0Ld write(*, '(A, F24.15)') ' x0Rd = ', x0Rd @@ -295,7 +296,7 @@ function prior(cube) result(theta) baby_point(S%h0:S%h1) = x0(S%h0:S%h1)+ (random_real() * (x0Rd+x0Ld) - x0Ld) * nhat ! --- START OF ADDED WARNING --- - if (any(isnan(baby_point(S%h0:S%h1)))) then + if (any(ieee_is_nan(baby_point(S%h0:S%h1)))) then write(*,'(A)') 'PolyChord TRACE (slice_sample): NaN detected in new parameter vector immediately after creation.' end if ! --- END OF ADDED WARNING --- diff --git a/src/polychord/mpi_utils.F90 b/src/polychord/mpi_utils.F90 index 3bb11ad4..8fdb0833 100644 --- a/src/polychord/mpi_utils.F90 +++ b/src/polychord/mpi_utils.F90 @@ -1,5 +1,6 @@ module mpi_module use utils_module, only: dp, normal_fb + use, intrinsic :: ieee_arithmetic implicit none #ifdef MPI @@ -436,7 +437,7 @@ subroutine throw_babies(baby_points,nlike,epoch,mpi_information) type(mpi_bundle), intent(in) :: mpi_information ! --- START OF ADDED TRACE --- - if (any(isnan(baby_points))) then + if (any(ieee_is_nan(baby_points))) then write(*,'(A, I0, A)') 'PolyChord TRACE (throw_babies): Worker rank ', mpi_information%rank, ' is about to send a NaN baby_points array to the root.' endif ! --- END OF ADDED TRACE --- diff --git a/src/polychord/nested_sampling.F90 b/src/polychord/nested_sampling.F90 index 6a09b807..88dccb60 100644 --- a/src/polychord/nested_sampling.F90 +++ b/src/polychord/nested_sampling.F90 @@ -1,5 +1,6 @@ module nested_sampling_module use utils_module, only: dp + use, intrinsic :: ieee_arithmetic #ifdef MPI use mpi_module, only: get_mpi_information,mpi_bundle,is_root,linear_mode,catch_babies,throw_babies,throw_seed,catch_seed,broadcast_integers,mpi_synchronise @@ -248,11 +249,11 @@ end subroutine dumper cholesky = RTI%cholesky(:,:,cluster_id) ! --- NEW AGGRESSIVE CHECK 6 --- - if (any(isnan(seed_point))) then + if (any(ieee_is_nan(seed_point))) then write(*, '(A, I0)') 'TRACE 6 (NestedSampling): NaN seed_point chosen from cluster ', cluster_id write(*, '(A, *(F24.15))') ' seed_point = ', seed_point end if - if (any(isnan(cholesky))) then + if (any(ieee_is_nan(cholesky))) then write(*, '(A, I0)') 'TRACE 6 (NestedSampling): NaN cholesky matrix chosen from cluster ', cluster_id ! Not printing the matrix as it's large, its presence is enough. end if @@ -500,7 +501,7 @@ end subroutine dumper slice_time = slice_time + time1-time0 ! --- NEW TRACE --- - if (any(isnan(baby_points))) then + if (any(ieee_is_nan(baby_points))) then write(*,'(A, I0, A)') 'PolyChord TRACE (NestedSampling): Worker rank ', mpi_information%rank, ' received NaN array from SliceSampling.' endif ! --- END TRACE --- diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index 5379a637..b8f229ef 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -1,5 +1,6 @@ module utils_module + use, intrinsic :: ieee_arithmetic implicit none #ifdef MPI include 'mpif.h' @@ -667,7 +668,7 @@ function calc_cholesky(a) result(L) L = identity_matrix(size(a,1)) * sqrt(max(trace_val, 0.d0)) ! --- NEW, CRITICAL TRACE --- - if (any(isnan(L))) then + if (any(ieee_is_nan(L))) then write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L immediately after creation from a failed decomposition.' end if @@ -683,7 +684,7 @@ function calc_cholesky(a) result(L) end do ! --- NEW, FINAL SANITY CHECK --- - if (any(isnan(L))) then + if (any(ieee_is_nan(L))) then write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L at function exit.' endif From 66523ae6868d50ea0d0b999388fdc2c373a9eea7 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 15 Jul 2025 14:25:17 +0100 Subject: [PATCH 26/44] fix typos --- src/polychord/generate.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index fe8e64aa..2be6cec8 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -126,7 +126,7 @@ function prior(cube) result(theta) ! Initialise number of likelihood calls to zero here nlike = 0 ngenerated = 1 - calculation_counter = 0 ! Initialize the new counter + ! calculation_counter = 0 ! Initialize the new counter if(is_root(mpi_information)) then @@ -268,7 +268,7 @@ function prior(cube) result(theta) ! The workers simply generate and send points until they're told to stop by the administrator - ! do while(live_point_needed(live_point,mpi_information)) + do while(live_point_needed(live_point,mpi_information)) ! ! --- START OF INTENTIONAL NaN INJECTION (FOR TESTING) --- ! calculation_counter = calculation_counter + 1 ! if (mpi_information%rank == 1 .and. calculation_counter == 5) then From d2732e7a7858b20ec09257ea339eef67dde33ae2 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 31 Jul 2025 12:36:36 +0100 Subject: [PATCH 27/44] turn off agressive trace 0 --- src/polychord/calculate.f90 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/polychord/calculate.f90 b/src/polychord/calculate.f90 index 7aae0000..5cbdc9d0 100644 --- a/src/polychord/calculate.f90 +++ b/src/polychord/calculate.f90 @@ -41,11 +41,11 @@ function prior(cube) result(theta) cube = point(settings%h0:settings%h1) - ! --- NEW AGGRESSIVE CHECK 0 --- - if (any(ieee_is_nan(point))) then - write(*, '(A)') 'TRACE 0 (calculate_point): NaN detected in the full input POINT vector.' - write(*, '(A, *(F24.15))') ' point = ', point - end if + ! ! --- NEW AGGRESSIVE CHECK 0 --- + ! if (any(ieee_is_nan(point))) then + ! write(*, '(A)') 'TRACE 0 (calculate_point): NaN detected in the full input POINT vector.' + ! write(*, '(A, *(F24.15))') ' point = ', point + ! end if ! --- START OF ADDED WARNING --- if (any(ieee_is_nan(cube))) then From ac5d30e5f9f6318d6a09f32c918a1e38b1d53018 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 31 Jul 2025 15:10:05 +0100 Subject: [PATCH 28/44] add minimal variance regularizer to cholesky decomposition --- src/polychord/utils.F90 | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index b8f229ef..310fde36 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -633,48 +633,27 @@ end function abovetol function calc_cholesky(a) result(L) implicit none real(dp), intent(in),dimension(:,:) :: a + real(dp), dimension(size(a,1),size(a,2)) :: a_reg real(dp), dimension(size(a,1),size(a,2)) :: L integer :: i,j - ! added trace_val to check for NaN - real(dp) :: trace_val + real(dp), parameter :: min_variance = 1.d-20 ! Set it all to zero to begin with L = 0 + ! Regularise the matrix to avoid numerical issues + a_reg = a + identity_matrix(size(a,1)) * min_variance + ! Zero out the upper half do i=1,size(a,1) - - L(i,i)= a(i,i) - sum(L(i,:i-1)**2) + L(i,i)= a_reg(i,i) - sum(L(i,:i-1)**2) if (L(i,i).le.0d0) then - ! If the cholesky decomposition does not exist, then set it to + ! If the cholesky decomposition still does not exist, then set it to ! be a re-scaled identity matrix - - ! --- START OF ADDED WARNING --- - trace_val = trace(a) - if (trace_val < 1.d-20) then - write(*,'(A)') 'PolyChord WARNING: Cholesky decomposition failed.' - if (trace_val < 0.d0) then - write(*,'(A, E12.5)') ' Matrix trace is NEGATIVE: ', trace_val - else - write(*,'(A, E12.5)') ' Matrix trace is near-zero: ', trace_val - end if - write(*,'(A)') ' This will likely lead to a NaN direction vector.' - end if - ! --- END OF ADDED WARNING --- - - ! L = identity_matrix(size(a,1)) * sqrt(trace(a)) - - ! Make this sqrt safe for negative inputs - L = identity_matrix(size(a,1)) * sqrt(max(trace_val, 0.d0)) - - ! --- NEW, CRITICAL TRACE --- - if (any(ieee_is_nan(L))) then - write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L immediately after creation from a failed decomposition.' - end if - + L = identity_matrix(size(a,1)) * sqrt(min_variance) return else - L(i,i)=sqrt(L(i,i)) + L(i,i) = sqrt(L(i,i)) end if do j=i+1,size(a,1) From d5395f4be0d2aa49af9bc6bd22eb64884ac0cd09 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 31 Jul 2025 15:17:38 +0100 Subject: [PATCH 29/44] fix typo --- src/polychord/utils.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index 310fde36..e0e901e5 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -657,7 +657,7 @@ function calc_cholesky(a) result(L) end if do j=i+1,size(a,1) - L(j,i) = (a(i,j) - sum(L(i,:i-1)*L(j,:i-1)))/L(i,i) + L(j,i) = (a_reg(i,j) - sum(L(i,:i-1)*L(j,:i-1)))/L(i,i) end do end do From b85e2a3e58dac8057660fb2f18236724a39fc36c Mon Sep 17 00:00:00 2001 From: dprelogo Date: Wed, 13 Aug 2025 16:23:40 +0100 Subject: [PATCH 30/44] ignore Claude output --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 165d67f9..514ca5e7 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,4 @@ pypolychord/.ld_preload.sh # Chains folder (test results only) chains/* .ipynb_checkpoints/ +CLAUDE.md \ No newline at end of file From 1cf4df2f946c785276aaf88148b13bb45199cb70 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Wed, 13 Aug 2025 16:27:39 +0100 Subject: [PATCH 31/44] cache regularise function and print matrices if NaN is detected --- src/polychord/utils.F90 | 86 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index e0e901e5..c98c3022 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -630,7 +630,7 @@ function abovetol (change,current_sum) end function abovetol - function calc_cholesky(a) result(L) + function calc_cholesky_regularised(a) result(L) implicit none real(dp), intent(in),dimension(:,:) :: a real(dp), dimension(size(a,1),size(a,2)) :: a_reg @@ -665,6 +665,90 @@ function calc_cholesky(a) result(L) ! --- NEW, FINAL SANITY CHECK --- if (any(ieee_is_nan(L))) then write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L at function exit.' + write(*,'(A)') 'Input matrix a:' + do i = 1, size(a,1) + write(*,'(*(E15.8,1X))') a(i,:) + end do + write(*,'(A)') 'Output matrix L:' + do i = 1, size(L,1) + write(*,'(*(E15.8,1X))') L(i,:) + end do + endif + + end function calc_cholesky_regularised + + function calc_cholesky(a) result(L) + implicit none + real(dp), intent(in),dimension(:,:) :: a + real(dp), dimension(size(a,1),size(a,2)) :: L + integer :: i,j + ! added trace_val to check for NaN + real(dp) :: trace_val + + ! Set it all to zero to begin with + L = 0 + + ! Zero out the upper half + do i=1,size(a,1) + + L(i,i)= a(i,i) - sum(L(i,:i-1)**2) + if (L(i,i).le.0d0) then + ! If the cholesky decomposition does not exist, then set it to + ! be a re-scaled identity matrix + + ! --- START OF ADDED WARNING --- + trace_val = trace(a) + if (trace_val < 1.d-20) then + write(*,'(A)') 'PolyChord WARNING: Cholesky decomposition failed.' + if (trace_val < 0.d0) then + write(*,'(A, E12.5)') ' Matrix trace is NEGATIVE: ', trace_val + else + write(*,'(A, E12.5)') ' Matrix trace is near-zero: ', trace_val + end if + write(*,'(A)') ' This will likely lead to a NaN direction vector.' + end if + ! --- END OF ADDED WARNING --- + + ! L = identity_matrix(size(a,1)) * sqrt(trace(a)) + + ! Make this sqrt safe for negative inputs + L = identity_matrix(size(a,1)) * sqrt(max(trace_val, 0.d0)) + + ! --- NEW, CRITICAL TRACE --- + if (any(ieee_is_nan(L))) then + write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L immediately after creation from a failed decomposition.' + write(*,'(A)') 'Input matrix a:' + do j = 1, size(a,1) + write(*,'(*(E15.8,1X))') a(j,:) + end do + write(*,'(A)') 'Output matrix L:' + do j = 1, size(L,1) + write(*,'(*(E15.8,1X))') L(j,:) + end do + end if + + return + else + L(i,i)=sqrt(L(i,i)) + end if + + do j=i+1,size(a,1) + L(j,i) = (a(i,j) - sum(L(i,:i-1)*L(j,:i-1)))/L(i,i) + end do + + end do + + ! --- NEW, FINAL SANITY CHECK --- + if (any(ieee_is_nan(L))) then + write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L at function exit.' + write(*,'(A)') 'Input matrix a:' + do i = 1, size(a,1) + write(*,'(*(E15.8,1X))') a(i,:) + end do + write(*,'(A)') 'Output matrix L:' + do i = 1, size(L,1) + write(*,'(*(E15.8,1X))') L(i,:) + end do endif end function calc_cholesky From 604966594b413626eb362fd776655bfe76bd3a6f Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 27 Oct 2025 14:06:35 +0000 Subject: [PATCH 32/44] Add NaN debug messages --- src/polychord/nested_sampling.F90 | 12 +++++++ src/polychord/run_time_info.f90 | 42 ++++++++++++++++++++--- src/polychord/utils.F90 | 55 ++++++++++++++++++++++++++++--- 3 files changed, 101 insertions(+), 8 deletions(-) diff --git a/src/polychord/nested_sampling.F90 b/src/polychord/nested_sampling.F90 index 88dccb60..accc24c2 100644 --- a/src/polychord/nested_sampling.F90 +++ b/src/polychord/nested_sampling.F90 @@ -268,6 +268,12 @@ end subroutine dumper ! Generate a new set of points within the likelihood bound of the late point baby_points = SliceSampling(loglikelihood,prior,settings,logL,seed_point,cholesky,nlike,num_repeats) + + ! === DEBUG PRIORITY 3: Check for NaN from SliceSampling === + if (any(ieee_is_nan(baby_points))) then + write(*,'(A)') 'DEBUG (NestedSampling): ERROR! NaN in baby_points from SliceSampling (linear mode) (Failure Point 4 upstream)' + endif + baby_points(settings%b0,:) = logL ! Note the moment it is born at #ifdef MPI else if(settings%synchronous) then @@ -303,6 +309,12 @@ end subroutine dumper ! Recieve any new baby points from any worker currently sending worker_id = catch_babies(baby_points,nlike,worker_epoch,mpi_information) + ! === DEBUG PRIORITY 3: Check for NaN from worker === + if (any(ieee_is_nan(baby_points))) then + write(*,'(A,I3,A)') 'DEBUG (NestedSampling): ERROR! NaN in baby_points from worker ', worker_id, & + ' (Failure Point 4 upstream)' + endif + ! and throw seeding information back to worker (true => keep going) call throw_seed(seed_point,cholesky,logL,mpi_information,worker_id,administrator_epoch,.true.) diff --git a/src/polychord/run_time_info.f90 b/src/polychord/run_time_info.f90 index 903446f0..128eb521 100644 --- a/src/polychord/run_time_info.f90 +++ b/src/polychord/run_time_info.f90 @@ -600,19 +600,53 @@ end function delete_cluster subroutine calculate_covmats(settings,RTI) use settings_module, only: program_settings - use utils_module, only: calc_cholesky, calc_covmat + use utils_module, only: calc_cholesky, calc_covmat, dp use array_module, only: concat + use, intrinsic :: ieee_arithmetic implicit none type(program_settings), intent(in) :: settings !> Program settings type(run_time_info),intent(inout) :: RTI !> Run time information integer :: i_cluster ! cluster iterator + ! === DEBUG PRIORITY 1: Variables for debugging === + integer :: num_points + real(dp), allocatable, dimension(:,:) :: points_to_check + ! For each cluster: do i_cluster = 1,RTI%ncluster - RTI%covmat(:,:,i_cluster) = calc_covmat(concat(RTI%live(settings%h0:settings%h1,:RTI%nlive(i_cluster),i_cluster),& - RTI%phantom(settings%h0:settings%h1,:RTI%nphantom(i_cluster),i_cluster)),& - settings%wraparound) + ! === DEBUG PRIORITY 1: Check inputs to calc_covmat === + if (RTI%nlive(i_cluster) + RTI%nphantom(i_cluster) > 0) then + points_to_check = concat(RTI%live(settings%h0:settings%h1,:RTI%nlive(i_cluster),i_cluster),& + RTI%phantom(settings%h0:settings%h1,:RTI%nphantom(i_cluster),i_cluster)) + else + allocate(points_to_check(settings%nDims,0)) + end if + num_points = size(points_to_check,2) + + write(*,'(A,I3,A,I5,A,I5,A,I5)') 'DEBUG (calculate_covmats): ENTER for cluster ', i_cluster, & + ', nlive=', RTI%nlive(i_cluster), ', nphantom=', RTI%nphantom(i_cluster), ', total_points=', num_points + + if (any(ieee_is_nan(points_to_check))) then + write(*,'(A,I3,A)') 'DEBUG (calculate_covmats): ERROR! NaN detected in input points for cluster ', & + i_cluster, ' (Failure Point 4)' + endif + + if (num_points < 2) then + write(*,'(A,I3,A)') 'DEBUG (calculate_covmats): WARNING! Insufficient points (n < 2) for cluster ', & + i_cluster, ' (Failure Point 1)' + endif + + ! Original line - now using points_to_check + RTI%covmat(:,:,i_cluster) = calc_covmat(points_to_check, settings%wraparound) + + ! Add after + if (any(ieee_is_nan(RTI%covmat(:,:,i_cluster)))) then + write(*,'(A,I3,A)') 'DEBUG (calculate_covmats): ERROR! NaN in covmat AFTER calc_covmat for cluster ', i_cluster + endif + + if (allocated(points_to_check)) deallocate(points_to_check) + ! Calculate the cholesky decomposition RTI%cholesky(:,:,i_cluster) = calc_cholesky(RTI%covmat(:,:,i_cluster)) end do diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index c98c3022..4d59bf23 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -763,25 +763,72 @@ function calc_covmat(x,wraparound) result(covmat) real(dp), dimension(size(x,1),size(x,2)) :: dx real(dp), dimension(size(x,1)) :: mu, circle_mu - integer :: nDims,n + integer :: nDims,n,i + real(dp) :: sum_s, sum_c ! For wraparound checks nDims = size(x,1) n = size(x,2) + ! === DEBUG PRIORITY 2 CHECK 1: Insufficient points === + write(*,'(A,I5)') 'DEBUG (calc_covmat): ENTER with n=', n + if (n < 2) then + write(*,'(A)') 'DEBUG (calc_covmat): ERROR! n < 2, covariance undefined (Failure Point 1)' + ! For now, just report; in Phase 2 we'll add fallback + covmat = 0.0_dp + return + endif + + ! === DEBUG PRIORITY 2 CHECK 2: Wraparound atan2(0,0) === ! Compute the circle mean circle_mu = 0d0 + if (any(wraparound)) then + do i = 1, nDims + if (wraparound(i)) then + sum_s = sum(sin(x(i,:)*TwoPi)) + sum_c = sum(cos(x(i,:)*TwoPi)) + if (abs(sum_s) < 1.d-15 .and. abs(sum_c) < 1.d-15) then + write(*,'(A,I3,A)') 'DEBUG (calc_covmat): WARNING! atan2(0,0) condition for dim ', i, ' (Failure Point 2)' + endif + endif + end do + endif where(wraparound) circle_mu = atan2(sum(sin(x*TwoPi),dim=2),sum(cos(x*TwoPi),dim=2))/TwoPi - ! Compute the mean - dx = x - spread(circle_mu,dim=2,ncopies=n) + if (any(ieee_is_nan(circle_mu))) then + write(*,'(A)') 'DEBUG (calc_covmat): ERROR! NaN in circle_mu after atan2 (Failure Point 2)' + endif + + ! Compute the mean + dx = x - spread(circle_mu,dim=2,ncopies=n) where(spread(wraparound,dim=2,ncopies=n)) dx = dx - nint(dx) mu = modulo(sum(dx,dim=2)/n + circle_mu, 1d0) + if (any(ieee_is_nan(mu))) then + write(*,'(A)') 'DEBUG (calc_covmat): ERROR! NaN in mu after mean calculation' + endif + + ! === DEBUG PRIORITY 2 CHECK 3: Catastrophic cancellation === ! Compute the covariance matrix - dx = x - spread(mu,dim=2,ncopies=n) + dx = x - spread(mu,dim=2,ncopies=n) where(spread(wraparound,dim=2,ncopies=n)) dx = dx - nint(dx) + + ! Check if dx is effectively zero (tight cluster) + if (maxval(abs(dx)) < 1.d-25) then + write(*,'(A,E12.3)') 'DEBUG (calc_covmat): WARNING! Cluster extremely tight, max|dx|=', maxval(abs(dx)), & + ' (Failure Point 3)' + endif + covmat = matmul(dx,transpose(dx))/(n-1) + if (any(ieee_is_nan(covmat))) then + write(*,'(A)') 'DEBUG (calc_covmat): ERROR! NaN in final covmat (likely Failure Point 3)' + endif + + ! Check trace for near-zero variance + if (trace(covmat) < 1.d-30) then + write(*,'(A,E12.3)') 'DEBUG (calc_covmat): WARNING! Trace near-zero: ', trace(covmat), ' (Failure Point 3)' + endif + end function calc_covmat !> This function computes the similarity matrix of an array of data. From bd9d7bee97e718f64edf49071e78c5b63e0545ad Mon Sep 17 00:00:00 2001 From: dprelogo Date: Mon, 27 Oct 2025 14:06:58 +0000 Subject: [PATCH 33/44] ignore .mcp folders --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 514ca5e7..23cee3f6 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,5 @@ pypolychord/.ld_preload.sh # Chains folder (test results only) chains/* .ipynb_checkpoints/ -CLAUDE.md \ No newline at end of file +CLAUDE.md +.mcp* \ No newline at end of file From ed36f996963b0bcebec29a670e2cc58dea9715e6 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Tue, 28 Oct 2025 12:02:43 +0000 Subject: [PATCH 34/44] Fix NaN issues in covariance and Cholesky calculations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements three numerical stability fixes to prevent NaN errors that occur with high convergence criteria and small clusters: (1) returns regularized identity matrix when n<2 points, (2) avoids atan2(0,0) in wraparound dimensions, and (3) pre-regularizes covariance matrices before Cholesky decomposition. Consolidates calc_cholesky_regularised into calc_cholesky. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/polychord/utils.F90 | 110 +++++++++++----------------------------- 1 file changed, 31 insertions(+), 79 deletions(-) diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index 4d59bf23..e35a38dd 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -87,6 +87,9 @@ module utils_module ! which means that we neglect all terms smaller than eps times the ! current sum + !> Minimum variance for regularization of covariance matrices + !! Used when clusters have insufficient points or near-zero variance + real(dp), parameter :: min_variance = 1.d-20 integer,parameter :: flag_blank = -2 integer,parameter :: flag_gestating = -1 @@ -630,89 +633,31 @@ function abovetol (change,current_sum) end function abovetol - function calc_cholesky_regularised(a) result(L) + function calc_cholesky(a) result(L) implicit none real(dp), intent(in),dimension(:,:) :: a real(dp), dimension(size(a,1),size(a,2)) :: a_reg real(dp), dimension(size(a,1),size(a,2)) :: L integer :: i,j - real(dp), parameter :: min_variance = 1.d-20 - - ! Set it all to zero to begin with - L = 0 - ! Regularise the matrix to avoid numerical issues + ! === FIX 3: Pre-regularize the matrix to avoid numerical issues === + ! Add min_variance to diagonal before attempting decomposition + ! This prevents failure for near-singular matrices from tight clusters a_reg = a + identity_matrix(size(a,1)) * min_variance - ! Zero out the upper half - do i=1,size(a,1) - L(i,i)= a_reg(i,i) - sum(L(i,:i-1)**2) - if (L(i,i).le.0d0) then - ! If the cholesky decomposition still does not exist, then set it to - ! be a re-scaled identity matrix - L = identity_matrix(size(a,1)) * sqrt(min_variance) - return - else - L(i,i) = sqrt(L(i,i)) - end if - - do j=i+1,size(a,1) - L(j,i) = (a_reg(i,j) - sum(L(i,:i-1)*L(j,:i-1)))/L(i,i) - end do - - end do - - ! --- NEW, FINAL SANITY CHECK --- - if (any(ieee_is_nan(L))) then - write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L at function exit.' - write(*,'(A)') 'Input matrix a:' - do i = 1, size(a,1) - write(*,'(*(E15.8,1X))') a(i,:) - end do - write(*,'(A)') 'Output matrix L:' - do i = 1, size(L,1) - write(*,'(*(E15.8,1X))') L(i,:) - end do - endif - - end function calc_cholesky_regularised - - function calc_cholesky(a) result(L) - implicit none - real(dp), intent(in),dimension(:,:) :: a - real(dp), dimension(size(a,1),size(a,2)) :: L - integer :: i,j - ! added trace_val to check for NaN - real(dp) :: trace_val - ! Set it all to zero to begin with L = 0 ! Zero out the upper half do i=1,size(a,1) - L(i,i)= a(i,i) - sum(L(i,:i-1)**2) + L(i,i)= a_reg(i,i) - sum(L(i,:i-1)**2) if (L(i,i).le.0d0) then - ! If the cholesky decomposition does not exist, then set it to - ! be a re-scaled identity matrix - - ! --- START OF ADDED WARNING --- - trace_val = trace(a) - if (trace_val < 1.d-20) then - write(*,'(A)') 'PolyChord WARNING: Cholesky decomposition failed.' - if (trace_val < 0.d0) then - write(*,'(A, E12.5)') ' Matrix trace is NEGATIVE: ', trace_val - else - write(*,'(A, E12.5)') ' Matrix trace is near-zero: ', trace_val - end if - write(*,'(A)') ' This will likely lead to a NaN direction vector.' - end if - ! --- END OF ADDED WARNING --- - - ! L = identity_matrix(size(a,1)) * sqrt(trace(a)) - - ! Make this sqrt safe for negative inputs - L = identity_matrix(size(a,1)) * sqrt(max(trace_val, 0.d0)) + ! If the cholesky decomposition still does not exist after regularization, + ! use a stronger fallback with guaranteed non-zero value + write(*,'(A)') 'PolyChord WARNING: Cholesky decomposition failed even after regularization.' + write(*,'(A)') ' Using minimal-variance identity matrix fallback.' + L = identity_matrix(size(a,1)) * sqrt(min_variance) ! --- NEW, CRITICAL TRACE --- if (any(ieee_is_nan(L))) then @@ -732,8 +677,8 @@ function calc_cholesky(a) result(L) L(i,i)=sqrt(L(i,i)) end if - do j=i+1,size(a,1) - L(j,i) = (a(i,j) - sum(L(i,:i-1)*L(j,:i-1)))/L(i,i) + do j=i+1,size(a_reg,1) + L(j,i) = (a_reg(i,j) - sum(L(i,:i-1)*L(j,:i-1)))/L(i,i) end do end do @@ -769,30 +714,37 @@ function calc_covmat(x,wraparound) result(covmat) nDims = size(x,1) n = size(x,2) - ! === DEBUG PRIORITY 2 CHECK 1: Insufficient points === + ! === FIX 1: Handle insufficient points (n < 2) === write(*,'(A,I5)') 'DEBUG (calc_covmat): ENTER with n=', n if (n < 2) then - write(*,'(A)') 'DEBUG (calc_covmat): ERROR! n < 2, covariance undefined (Failure Point 1)' - ! For now, just report; in Phase 2 we'll add fallback - covmat = 0.0_dp + write(*,'(A)') 'PolyChord WARNING (calc_covmat): n < 2, returning minimal-variance identity matrix (Fix 1)' + ! Return minimal-variance identity matrix to allow local exploration + ! This is mathematically sound: represents zero correlation and minimal variance + covmat = identity_matrix(nDims) * min_variance return endif - ! === DEBUG PRIORITY 2 CHECK 2: Wraparound atan2(0,0) === - ! Compute the circle mean + ! === FIX 2: Wraparound atan2(0,0) protection === + ! Compute the circle mean with protection against atan2(0,0) circle_mu = 0d0 if (any(wraparound)) then do i = 1, nDims if (wraparound(i)) then sum_s = sum(sin(x(i,:)*TwoPi)) sum_c = sum(cos(x(i,:)*TwoPi)) - if (abs(sum_s) < 1.d-15 .and. abs(sum_c) < 1.d-15) then - write(*,'(A,I3,A)') 'DEBUG (calc_covmat): WARNING! atan2(0,0) condition for dim ', i, ' (Failure Point 2)' + ! Check if magnitude is effectively zero (all points identical or antipodal) + if (sqrt(sum_s**2 + sum_c**2) < 1.d-15) then + ! Mean is undefined, use first point's value + circle_mu(i) = x(i,1) + write(*,'(A,I3,A)') 'PolyChord WARNING (calc_covmat): atan2(0,0) avoided for dim ', i, & + ' - using first point value (Fix 2)' + else + ! Normal case: compute angle + circle_mu(i) = atan2(sum_s, sum_c) / TwoPi endif endif end do endif - where(wraparound) circle_mu = atan2(sum(sin(x*TwoPi),dim=2),sum(cos(x*TwoPi),dim=2))/TwoPi if (any(ieee_is_nan(circle_mu))) then write(*,'(A)') 'DEBUG (calc_covmat): ERROR! NaN in circle_mu after atan2 (Failure Point 2)' From 9ea974ffaf9e1582c872be4a95b88ed0af2805cb Mon Sep 17 00:00:00 2001 From: dprelogo Date: Fri, 14 Nov 2025 16:34:57 +0000 Subject: [PATCH 35/44] Replace minimal-variance with unit covariance for single-point clusters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a cluster has only one point (n < 2), use unit identity matrix instead of minimal-variance identity matrix (1.0 instead of 1.0e-20). This provides more reasonable variance for local exploration. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/polychord/utils.F90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index e35a38dd..18fc1077 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -717,10 +717,10 @@ function calc_covmat(x,wraparound) result(covmat) ! === FIX 1: Handle insufficient points (n < 2) === write(*,'(A,I5)') 'DEBUG (calc_covmat): ENTER with n=', n if (n < 2) then - write(*,'(A)') 'PolyChord WARNING (calc_covmat): n < 2, returning minimal-variance identity matrix (Fix 1)' - ! Return minimal-variance identity matrix to allow local exploration - ! This is mathematically sound: represents zero correlation and minimal variance - covmat = identity_matrix(nDims) * min_variance + write(*,'(A)') 'PolyChord WARNING (calc_covmat): n < 2, returning unit covariance identity matrix (Fix 1)' + ! Return unit identity matrix to allow local exploration + ! This is mathematically sound: represents zero correlation and unit variance + covmat = identity_matrix(nDims) return endif From ae5daf1941ce70e59bdbbd4dbfb92351909c3fea Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 27 Nov 2025 11:56:31 +0000 Subject: [PATCH 36/44] Add conditional LAPACK support to build system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add LAPACK=1 option (enabled by default) for eigendecomposition-based covariance regularization. Includes support for: - GNU: -llapack -lblas - Intel: -mkl - Intel LLVM: -qmkl - Cray: LibSci (auto-linked) Set LAPACK=0 to use pure-Fortran Jacobi fallback. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- Makefile | 6 +++++- Makefile_cray | 8 ++++++++ Makefile_gnu | 7 +++++++ Makefile_intel | 8 ++++++++ Makefile_intel_llvm | 8 ++++++++ 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c6bc6410..df6a24f1 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,11 @@ MPI=1 # Whether to compile in debugging mode (default: false) DEBUG=0 -export MPI DEBUG +# Whether to use LAPACK for eigendecomposition (default: true) +# Set LAPACK=0 to use pure-Fortran Jacobi fallback +LAPACK=1 + +export MPI DEBUG LAPACK # We can autodetect the compiler type on unix systems via the shell. # if you want to override this then just run make with diff --git a/Makefile_cray b/Makefile_cray index 3456fe6c..f315809a 100644 --- a/Makefile_cray +++ b/Makefile_cray @@ -19,6 +19,14 @@ FFLAGS += -ffree-line-length-none -fallow-argument-mismatch CXXFLAGS += -ffree-line-length-none -fallow-argument-mismatch endif +# LAPACK support for eigendecomposition-based covariance regularization +# Set LAPACK=0 to disable (will use pure-Fortran Jacobi fallback) +# Cray systems typically have LAPACK via LibSci +ifeq ($(LAPACK),1) +FFLAGS += -DHAVE_LAPACK +# LibSci is usually linked automatically on Cray systems +endif + ifdef IPO # Archive tool for compiling with ipo. AR = xiar rv diff --git a/Makefile_gnu b/Makefile_gnu index 1c2f7035..b101336d 100644 --- a/Makefile_gnu +++ b/Makefile_gnu @@ -43,6 +43,13 @@ else RPATH= endif +# LAPACK support for eigendecomposition-based covariance regularization +# Set LAPACK=0 to disable (will use pure-Fortran Jacobi fallback) +ifeq ($(LAPACK),1) +FFLAGS += -DHAVE_LAPACK +LDLIBS += -llapack -lblas +endif + ifeq ($(DEBUG),1) # Debugging mode # -------------- diff --git a/Makefile_intel b/Makefile_intel index c59228dd..ec83e179 100644 --- a/Makefile_intel +++ b/Makefile_intel @@ -28,6 +28,14 @@ CXXFLAGS += -std=c++11 -fpic LDFLAGS += -nofor-main LDLIBS += -cxxlib +# LAPACK support for eigendecomposition-based covariance regularization +# Set LAPACK=0 to disable (will use pure-Fortran Jacobi fallback) +# Intel MKL provides LAPACK +ifeq ($(LAPACK),1) +FFLAGS += -DHAVE_LAPACK +LDLIBS += -mkl +endif + ifeq ($(DEBUG),1) # Debugging mode # -------------- diff --git a/Makefile_intel_llvm b/Makefile_intel_llvm index d912f499..0e23e5d0 100644 --- a/Makefile_intel_llvm +++ b/Makefile_intel_llvm @@ -34,6 +34,14 @@ LDFLAGS += # Link specific Intel runtime libraries LDLIBS += -lifcore -lifport -limf -lsvml -liomp5 +# LAPACK support for eigendecomposition-based covariance regularization +# Set LAPACK=0 to disable (will use pure-Fortran Jacobi fallback) +# Intel MKL provides LAPACK (use -qmkl for LLVM-based compilers) +ifeq ($(LAPACK),1) +FFLAGS += -DHAVE_LAPACK -qmkl +LDLIBS += -qmkl +endif + ifeq ($(DEBUG),1) # Debugging mode # -------------- From c5c6775aea90426c9618ec4c283f3814a1afbaf7 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 27 Nov 2025 12:04:32 +0000 Subject: [PATCH 37/44] Add eigendecomposition-based regularization for rank-deficient covariance matrices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When clusters have insufficient points (n < D+1), the covariance matrix is rank-deficient and cannot have a valid Cholesky decomposition. This commit implements a hybrid regularization approach: Algorithm: - Compute eigendecomposition of the covariance matrix - Replace eigenvalues below threshold max(λ_max * 1e-10, 1e-20) with 1.0 - This inserts unit variance along null-space directions (valid in unit hypercube) - Reconstruct the regularized matrix: V * Λ_reg * V^T Implementation: - jacobi_eigen: Pure-Fortran cyclic Jacobi eigendecomposition fallback - sym_eigen: Wrapper that uses LAPACK DSYEV when available, Jacobi otherwise - regularize_covmat: Main regularization function implementing the algorithm - calc_cholesky: Now accepts optional n_points parameter to trigger regularization The Cholesky fallback behavior is restored to original PolyChord: use sqrt(trace(a)) scaled identity matrix when decomposition fails. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/polychord/utils.F90 | 427 ++++++++++++++++++++++++++++++++++------ 1 file changed, 362 insertions(+), 65 deletions(-) diff --git a/src/polychord/utils.F90 b/src/polychord/utils.F90 index 18fc1077..eef95d29 100644 --- a/src/polychord/utils.F90 +++ b/src/polychord/utils.F90 @@ -91,6 +91,25 @@ module utils_module !! Used when clusters have insufficient points or near-zero variance real(dp), parameter :: min_variance = 1.d-20 + !> Relative threshold for detecting zero eigenvalues in regularization + real(dp), parameter :: eigenvalue_rel_threshold = 1.d-10 + !> Absolute floor threshold for eigenvalues (prevents issues when max eigenvalue is tiny) + real(dp), parameter :: eigenvalue_abs_threshold = 1.d-20 + +#ifdef HAVE_LAPACK + !> LAPACK interface for symmetric eigenvalue decomposition + interface + subroutine dsyev(jobz, uplo, n, a, lda, w, work, lwork, info) + import :: dp + character, intent(in) :: jobz, uplo + integer, intent(in) :: n, lda, lwork + integer, intent(out) :: info + real(dp), intent(inout) :: a(lda,*) + real(dp), intent(out) :: w(*), work(*) + end subroutine dsyev + end interface +#endif + integer,parameter :: flag_blank = -2 integer,parameter :: flag_gestating = -1 integer,parameter :: flag_waiting = 0 @@ -633,17 +652,45 @@ function abovetol (change,current_sum) end function abovetol - function calc_cholesky(a) result(L) + !> Compute Cholesky decomposition of a covariance matrix + !! + !! If n_points is provided and n_points < nDims + 1, the covariance matrix + !! is rank-deficient. In this case, we first apply eigendecomposition-based + !! regularization to make it positive-definite before computing Cholesky. + !! + !! @param[in] a Input covariance matrix + !! @param[in] n_points Optional: number of points used to compute covariance + !! @return L Lower-triangular Cholesky factor + function calc_cholesky(a, n_points) result(L) implicit none real(dp), intent(in),dimension(:,:) :: a + integer, intent(in), optional :: n_points real(dp), dimension(size(a,1),size(a,2)) :: a_reg real(dp), dimension(size(a,1),size(a,2)) :: L - integer :: i,j + integer :: i,j, nDims + logical :: needs_regularization - ! === FIX 3: Pre-regularize the matrix to avoid numerical issues === - ! Add min_variance to diagonal before attempting decomposition - ! This prevents failure for near-singular matrices from tight clusters - a_reg = a + identity_matrix(size(a,1)) * min_variance + nDims = size(a, 1) + + ! Determine if eigendecomposition-based regularization is needed + ! This is required when the covariance matrix is rank-deficient (n < D+1) + needs_regularization = .false. + if (present(n_points)) then + if (n_points < nDims + 1) then + needs_regularization = .true. + end if + end if + + if (needs_regularization) then + ! Use eigendecomposition-based regularization for rank-deficient matrices + ! This replaces zero eigenvalues with 1.0 (unit variance in null-space) + a_reg = regularize_covmat(a) + else + ! === FIX 3: Pre-regularize the matrix to avoid numerical issues === + ! Add min_variance to diagonal before attempting decomposition + ! This prevents failure for near-singular matrices from tight clusters + a_reg = a + identity_matrix(nDims) * min_variance + end if ! Set it all to zero to begin with L = 0 @@ -653,25 +700,9 @@ function calc_cholesky(a) result(L) L(i,i)= a_reg(i,i) - sum(L(i,:i-1)**2) if (L(i,i).le.0d0) then - ! If the cholesky decomposition still does not exist after regularization, - ! use a stronger fallback with guaranteed non-zero value - write(*,'(A)') 'PolyChord WARNING: Cholesky decomposition failed even after regularization.' - write(*,'(A)') ' Using minimal-variance identity matrix fallback.' - L = identity_matrix(size(a,1)) * sqrt(min_variance) - - ! --- NEW, CRITICAL TRACE --- - if (any(ieee_is_nan(L))) then - write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L immediately after creation from a failed decomposition.' - write(*,'(A)') 'Input matrix a:' - do j = 1, size(a,1) - write(*,'(*(E15.8,1X))') a(j,:) - end do - write(*,'(A)') 'Output matrix L:' - do j = 1, size(L,1) - write(*,'(*(E15.8,1X))') L(j,:) - end do - end if - + ! If the cholesky decomposition does not exist, then set it to + ! be a re-scaled identity matrix (original PolyChord behavior) + L = identity_matrix(nDims) * sqrt(trace(a_reg)) return else L(i,i)=sqrt(L(i,i)) @@ -683,20 +714,314 @@ function calc_cholesky(a) result(L) end do - ! --- NEW, FINAL SANITY CHECK --- - if (any(ieee_is_nan(L))) then - write(*,'(A)') 'PolyChord TRACE (calc_cholesky): NaN detected in Cholesky matrix L at function exit.' - write(*,'(A)') 'Input matrix a:' - do i = 1, size(a,1) - write(*,'(*(E15.8,1X))') a(i,:) + end function calc_cholesky + + !> Compute eigenvalues and eigenvectors of a symmetric matrix using Jacobi method + !! + !! This is a pure-Fortran fallback for systems without LAPACK. + !! Uses the cyclic Jacobi algorithm which iteratively applies Givens rotations + !! to diagonalize the matrix. + !! + !! @param[inout] a On entry: symmetric matrix. On exit: destroyed. + !! @param[out] w Eigenvalues in ascending order + !! @param[out] v Eigenvectors as columns (v(:,i) is eigenvector for w(i)) + !! @param[out] info 0 on success, >0 if failed to converge + subroutine jacobi_eigen(a, w, v, info) + implicit none + real(dp), intent(inout), dimension(:,:) :: a + real(dp), intent(out), dimension(:) :: w + real(dp), intent(out), dimension(:,:) :: v + integer, intent(out) :: info + + integer :: n, i, j, k, p, q, sweep + real(dp) :: threshold, off_diag, c, s, t, tau, h, g, temp + real(dp) :: tol, sum_off + integer, parameter :: max_sweeps = 50 + + n = size(a, 1) + info = 0 + tol = 1.0d-15 + + ! Initialize eigenvector matrix to identity + v = 0.0d0 + do i = 1, n + v(i,i) = 1.0d0 + end do + + ! Main Jacobi iteration + do sweep = 1, max_sweeps + ! Compute sum of off-diagonal elements + sum_off = 0.0d0 + do i = 1, n-1 + do j = i+1, n + sum_off = sum_off + abs(a(i,j)) + end do end do - write(*,'(A)') 'Output matrix L:' - do i = 1, size(L,1) - write(*,'(*(E15.8,1X))') L(i,:) + + ! Check for convergence + if (sum_off < tol * n * n) exit + + ! Threshold for this sweep + if (sweep < 4) then + threshold = 0.2d0 * sum_off / (n * n) + else + threshold = 0.0d0 + end if + + ! Sweep through all off-diagonal elements + do p = 1, n-1 + do q = p+1, n + off_diag = abs(a(p,q)) + + ! Skip if element is small enough + if (sweep > 4 .and. off_diag < tol * abs(a(p,p)) .and. & + off_diag < tol * abs(a(q,q))) then + a(p,q) = 0.0d0 + a(q,p) = 0.0d0 + cycle + end if + + if (off_diag > threshold) then + h = a(q,q) - a(p,p) + + if (abs(h) < tol * off_diag) then + t = 1.0d0 + if (a(p,q) < 0.0d0) t = -1.0d0 + else + tau = h / (2.0d0 * a(p,q)) + if (tau >= 0.0d0) then + t = 1.0d0 / (tau + sqrt(1.0d0 + tau*tau)) + else + t = -1.0d0 / (-tau + sqrt(1.0d0 + tau*tau)) + end if + end if + + c = 1.0d0 / sqrt(1.0d0 + t*t) + s = t * c + tau = s / (1.0d0 + c) + h = t * a(p,q) + + ! Update diagonal elements + a(p,p) = a(p,p) - h + a(q,q) = a(q,q) + h + a(p,q) = 0.0d0 + a(q,p) = 0.0d0 + + ! Update rest of row/column p and q + do k = 1, p-1 + g = a(k,p) + h = a(k,q) + a(k,p) = g - s * (h + g * tau) + a(k,q) = h + s * (g - h * tau) + end do + do k = p+1, q-1 + g = a(p,k) + h = a(k,q) + a(p,k) = g - s * (h + g * tau) + a(k,q) = h + s * (g - h * tau) + end do + do k = q+1, n + g = a(p,k) + h = a(q,k) + a(p,k) = g - s * (h + g * tau) + a(q,k) = h + s * (g - h * tau) + end do + + ! Update eigenvector matrix + do k = 1, n + g = v(k,p) + h = v(k,q) + v(k,p) = g - s * (h + g * tau) + v(k,q) = h + s * (g - h * tau) + end do + end if + end do end do - endif + end do - end function calc_cholesky + ! Check convergence + if (sweep > max_sweeps) then + info = 1 + write(*,'(A)') 'PolyChord WARNING (jacobi_eigen): Failed to converge' + end if + + ! Extract eigenvalues from diagonal + do i = 1, n + w(i) = a(i,i) + end do + + ! Sort eigenvalues in ascending order (simple bubble sort - n is small) + do i = 1, n-1 + do j = i+1, n + if (w(j) < w(i)) then + ! Swap eigenvalues + temp = w(i) + w(i) = w(j) + w(j) = temp + ! Swap eigenvector columns + do k = 1, n + temp = v(k,i) + v(k,i) = v(k,j) + v(k,j) = temp + end do + end if + end do + end do + + end subroutine jacobi_eigen + + + !> Compute eigenvalues and eigenvectors of a symmetric matrix + !! + !! Uses LAPACK DSYEV if available, otherwise falls back to Jacobi method. + !! + !! @param[in] a_in Symmetric matrix (not modified) + !! @param[out] w Eigenvalues in ascending order + !! @param[out] v Eigenvectors as columns + !! @param[out] info 0 on success, >0 on failure + subroutine sym_eigen(a_in, w, v, info) + implicit none + real(dp), intent(in), dimension(:,:) :: a_in + real(dp), intent(out), dimension(:) :: w + real(dp), intent(out), dimension(:,:) :: v + integer, intent(out) :: info + + integer :: n, lwork + real(dp), allocatable :: work(:) +#ifdef HAVE_LAPACK + real(dp), allocatable :: a_copy(:,:) +#endif + + n = size(a_in, 1) + +#ifdef HAVE_LAPACK + ! Use LAPACK DSYEV + allocate(a_copy(n,n)) + a_copy = a_in + + ! Query optimal workspace size + allocate(work(1)) + lwork = -1 + call dsyev('V', 'U', n, a_copy, n, w, work, lwork, info) + lwork = int(work(1)) + deallocate(work) + allocate(work(lwork)) + + ! Compute eigenvalues and eigenvectors + call dsyev('V', 'U', n, a_copy, n, w, work, lwork, info) + + ! DSYEV stores eigenvectors in a_copy + v = a_copy + + deallocate(work, a_copy) +#else + ! Use pure-Fortran Jacobi fallback + ! jacobi_eigen destroys the input matrix, so we work on a copy + ! and store eigenvectors separately + allocate(work(n*n)) + work = reshape(a_in, [n*n]) + v = reshape(work, [n,n]) ! v is used as working copy for matrix + deallocate(work) + + ! jacobi_eigen expects separate output for eigenvectors + block + real(dp), dimension(n,n) :: a_work, v_work + a_work = a_in + call jacobi_eigen(a_work, w, v_work, info) + v = v_work + end block +#endif + + end subroutine sym_eigen + + + !> Regularize a covariance matrix to ensure positive-definiteness + !! + !! For rank-deficient covariance matrices (from clusters with n < D+1 points), + !! this function replaces zero eigenvalues with 1.0 (unit variance in null-space). + !! + !! Algorithm: + !! 1. Eigendecomposition: Σ = Q Λ Q^T + !! 2. Replace eigenvalues below threshold with 1.0 + !! 3. Reconstruct: Σ_reg = Q Λ_reg Q^T + !! + !! @param[in] covmat Input covariance matrix (may be singular) + !! @return covmat_reg Regularized positive-definite covariance matrix + function regularize_covmat(covmat) result(covmat_reg) + implicit none + real(dp), intent(in), dimension(:,:) :: covmat + real(dp), dimension(size(covmat,1), size(covmat,2)) :: covmat_reg + + integer :: n, i, j, info + real(dp), allocatable :: w(:), v(:,:), w_reg(:) + real(dp) :: max_eigenvalue, threshold + + n = size(covmat, 1) + allocate(w(n), v(n,n), w_reg(n)) + + ! Step 1: Eigendecomposition + call sym_eigen(covmat, w, v, info) + + if (info /= 0) then + write(*,'(A,I3)') 'PolyChord WARNING (regularize_covmat): Eigendecomposition failed with info=', info + write(*,'(A)') ' Returning identity matrix' + covmat_reg = identity_matrix(n) + deallocate(w, v, w_reg) + return + end if + + ! Check for NaN in eigenvalues + if (any(ieee_is_nan(w))) then + write(*,'(A)') 'PolyChord WARNING (regularize_covmat): NaN in eigenvalues, returning identity matrix' + covmat_reg = identity_matrix(n) + deallocate(w, v, w_reg) + return + end if + + ! Step 2: Handle negative eigenvalues (numerical error) and compute threshold + w_reg = max(0.0d0, w) + max_eigenvalue = maxval(w_reg) + + ! Hybrid threshold: relative + absolute floor + threshold = max(max_eigenvalue * eigenvalue_rel_threshold, eigenvalue_abs_threshold) + + ! Step 3: Replace small/zero eigenvalues with 1.0 (unit variance in null-space) + ! Also handle rank-0 case (all eigenvalues near zero) + if (max_eigenvalue < eigenvalue_abs_threshold) then + ! All eigenvalues effectively zero - return identity + write(*,'(A)') 'PolyChord INFO (regularize_covmat): All eigenvalues near zero, using identity matrix' + covmat_reg = identity_matrix(n) + deallocate(w, v, w_reg) + return + end if + + do i = 1, n + if (w_reg(i) < threshold) then + w_reg(i) = 1.0d0 + end if + end do + + ! Step 4: Reconstruct covariance matrix: Σ_reg = V * diag(w_reg) * V^T + ! Use efficient matmul instead of nested loops for BLAS optimization + block + real(dp), dimension(n,n) :: temp_mat + ! Calculate V * Lambda (scale columns of V by eigenvalues) + do j = 1, n + temp_mat(:,j) = v(:,j) * w_reg(j) + end do + ! Calculate (V * Lambda) * V^T + covmat_reg = matmul(temp_mat, transpose(v)) + end block + + ! Sanity check + if (any(ieee_is_nan(covmat_reg))) then + write(*,'(A)') 'PolyChord WARNING (regularize_covmat): NaN in reconstructed matrix, returning identity' + covmat_reg = identity_matrix(n) + end if + + deallocate(w, v, w_reg) + + end function regularize_covmat function calc_covmat(x,wraparound) result(covmat) implicit none @@ -714,10 +1039,8 @@ function calc_covmat(x,wraparound) result(covmat) nDims = size(x,1) n = size(x,2) - ! === FIX 1: Handle insufficient points (n < 2) === - write(*,'(A,I5)') 'DEBUG (calc_covmat): ENTER with n=', n + ! Handle insufficient points (n < 2) if (n < 2) then - write(*,'(A)') 'PolyChord WARNING (calc_covmat): n < 2, returning unit covariance identity matrix (Fix 1)' ! Return unit identity matrix to allow local exploration ! This is mathematically sound: represents zero correlation and unit variance covmat = identity_matrix(nDims) @@ -736,8 +1059,6 @@ function calc_covmat(x,wraparound) result(covmat) if (sqrt(sum_s**2 + sum_c**2) < 1.d-15) then ! Mean is undefined, use first point's value circle_mu(i) = x(i,1) - write(*,'(A,I3,A)') 'PolyChord WARNING (calc_covmat): atan2(0,0) avoided for dim ', i, & - ' - using first point value (Fix 2)' else ! Normal case: compute angle circle_mu(i) = atan2(sum_s, sum_c) / TwoPi @@ -746,41 +1067,17 @@ function calc_covmat(x,wraparound) result(covmat) end do endif - if (any(ieee_is_nan(circle_mu))) then - write(*,'(A)') 'DEBUG (calc_covmat): ERROR! NaN in circle_mu after atan2 (Failure Point 2)' - endif - ! Compute the mean dx = x - spread(circle_mu,dim=2,ncopies=n) where(spread(wraparound,dim=2,ncopies=n)) dx = dx - nint(dx) mu = modulo(sum(dx,dim=2)/n + circle_mu, 1d0) - if (any(ieee_is_nan(mu))) then - write(*,'(A)') 'DEBUG (calc_covmat): ERROR! NaN in mu after mean calculation' - endif - - ! === DEBUG PRIORITY 2 CHECK 3: Catastrophic cancellation === ! Compute the covariance matrix dx = x - spread(mu,dim=2,ncopies=n) where(spread(wraparound,dim=2,ncopies=n)) dx = dx - nint(dx) - ! Check if dx is effectively zero (tight cluster) - if (maxval(abs(dx)) < 1.d-25) then - write(*,'(A,E12.3)') 'DEBUG (calc_covmat): WARNING! Cluster extremely tight, max|dx|=', maxval(abs(dx)), & - ' (Failure Point 3)' - endif - covmat = matmul(dx,transpose(dx))/(n-1) - if (any(ieee_is_nan(covmat))) then - write(*,'(A)') 'DEBUG (calc_covmat): ERROR! NaN in final covmat (likely Failure Point 3)' - endif - - ! Check trace for near-zero variance - if (trace(covmat) < 1.d-30) then - write(*,'(A,E12.3)') 'DEBUG (calc_covmat): WARNING! Trace near-zero: ', trace(covmat), ' (Failure Point 3)' - endif - end function calc_covmat !> This function computes the similarity matrix of an array of data. From d249c4c45d89a472fe51513bd0593562f5b81a8b Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 27 Nov 2025 12:04:43 +0000 Subject: [PATCH 38/44] Integrate eigendecomposition regularization in calculate_covmats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Pass num_points to calc_cholesky to enable rank-deficient regularization - Remove debug output that was added during development - Clean up variable declarations The num_points parameter allows calc_cholesky to determine when eigendecomposition-based regularization is needed (n < D+1 case). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/polychord/run_time_info.f90 | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/src/polychord/run_time_info.f90 b/src/polychord/run_time_info.f90 index 128eb521..05755257 100644 --- a/src/polychord/run_time_info.f90 +++ b/src/polychord/run_time_info.f90 @@ -609,13 +609,12 @@ subroutine calculate_covmats(settings,RTI) type(run_time_info),intent(inout) :: RTI !> Run time information integer :: i_cluster ! cluster iterator - ! === DEBUG PRIORITY 1: Variables for debugging === integer :: num_points real(dp), allocatable, dimension(:,:) :: points_to_check ! For each cluster: do i_cluster = 1,RTI%ncluster - ! === DEBUG PRIORITY 1: Check inputs to calc_covmat === + ! Concatenate live and phantom points for this cluster if (RTI%nlive(i_cluster) + RTI%nphantom(i_cluster) > 0) then points_to_check = concat(RTI%live(settings%h0:settings%h1,:RTI%nlive(i_cluster),i_cluster),& RTI%phantom(settings%h0:settings%h1,:RTI%nphantom(i_cluster),i_cluster)) @@ -624,31 +623,15 @@ subroutine calculate_covmats(settings,RTI) end if num_points = size(points_to_check,2) - write(*,'(A,I3,A,I5,A,I5,A,I5)') 'DEBUG (calculate_covmats): ENTER for cluster ', i_cluster, & - ', nlive=', RTI%nlive(i_cluster), ', nphantom=', RTI%nphantom(i_cluster), ', total_points=', num_points - - if (any(ieee_is_nan(points_to_check))) then - write(*,'(A,I3,A)') 'DEBUG (calculate_covmats): ERROR! NaN detected in input points for cluster ', & - i_cluster, ' (Failure Point 4)' - endif - - if (num_points < 2) then - write(*,'(A,I3,A)') 'DEBUG (calculate_covmats): WARNING! Insufficient points (n < 2) for cluster ', & - i_cluster, ' (Failure Point 1)' - endif - - ! Original line - now using points_to_check + ! Calculate the covariance matrix RTI%covmat(:,:,i_cluster) = calc_covmat(points_to_check, settings%wraparound) - ! Add after - if (any(ieee_is_nan(RTI%covmat(:,:,i_cluster)))) then - write(*,'(A,I3,A)') 'DEBUG (calculate_covmats): ERROR! NaN in covmat AFTER calc_covmat for cluster ', i_cluster - endif - if (allocated(points_to_check)) deallocate(points_to_check) ! Calculate the cholesky decomposition - RTI%cholesky(:,:,i_cluster) = calc_cholesky(RTI%covmat(:,:,i_cluster)) + ! Pass num_points to enable eigendecomposition-based regularization + ! for rank-deficient covariance matrices (when n < D+1) + RTI%cholesky(:,:,i_cluster) = calc_cholesky(RTI%covmat(:,:,i_cluster), num_points) end do From 26d730163a4cd41a2f7c6f9489de8ac1ed294210 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 27 Nov 2025 12:15:35 +0000 Subject: [PATCH 39/44] add lapack variable to enable it by default --- setup.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d075db15..bb2aa37a 100644 --- a/setup.py +++ b/setup.py @@ -99,7 +99,10 @@ def run(self): if self.distribution.debug_flags is not None: self.distribution.ext_modules[0].extra_compile_args += ["-g", "-O0"] env["DEBUG"] = "1" - + + # Enable LAPACK by default for eigendecomposition-based covariance regularization + env["LAPACK"] = os.environ.get("LAPACK", "1") + BASE_PATH = os.path.dirname(os.path.abspath(__file__)) env["CURDIR"] = BASE_PATH env.update({k : os.environ[k] for k in ["CC", "CXX", "FC"] if k in os.environ}) From 0e0ec028d2a694f5e015d74112a4072607d0f305 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 26 Feb 2026 16:31:11 +0000 Subject: [PATCH 40/44] fix: pop cube_samples before kwargs validation in run() cube_samples was handled on line 587 but was never included in default_kwargs, so the validation check on line 565 rejected it as an unknown keyword argument before it could be used. Follow the same pattern as paramnames: pop it before validation, use the local variable after. Co-Authored-By: Claude Opus 4.6 --- pypolychord/polychord.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pypolychord/polychord.py b/pypolychord/polychord.py index f32efe67..8257ee74 100644 --- a/pypolychord/polychord.py +++ b/pypolychord/polychord.py @@ -521,6 +521,7 @@ def run(loglikelihood, nDims, **kwargs): default_comm = None paramnames = kwargs.pop('paramnames', None) + cube_samples = kwargs.pop('cube_samples', None) default_kwargs = { 'nDerived': 0, @@ -583,8 +584,8 @@ def run(loglikelihood, nDims, **kwargs): (kwargs['file_root'] + ".paramnames")) - if 'cube_samples' in kwargs: - _make_resume_file(loglikelihood, kwargs['prior'], **kwargs) + if cube_samples is not None: + _make_resume_file(loglikelihood, cube_samples=cube_samples, **kwargs) read_resume = kwargs['read_resume'] kwargs['read_resume'] = True @@ -644,7 +645,7 @@ def wrap_prior(cube, theta): kwargs['seed'], kwargs['comm']) - if 'cube_samples' in kwargs: + if cube_samples is not None: kwargs['read_resume'] = read_resume try: From cdb65033f4695abaf66f5e7fefaf9c10e9cc000f Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 26 Feb 2026 17:07:40 +0000 Subject: [PATCH 41/44] Add full pyproject.toml metadata for uv sync support Add [project] table with dependencies, optional-dependencies, and dev dependency group so that `uv sync` can create a local environment with MPI support. Disable build isolation for pypolychord via [tool.uv] so MPI compilers have access to the full environment. Fix setup.py to pass HOME and other essential env vars to the make subprocess, which OpenMPI's mpifort requires for opal_init. Co-Authored-By: Claude Opus 4.6 --- .gitignore | 3 ++- pyproject.toml | 35 +++++++++++++++++++++++++++++++++++ setup.py | 5 +++-- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 23cee3f6..27117c7c 100644 --- a/.gitignore +++ b/.gitignore @@ -34,4 +34,5 @@ pypolychord/.ld_preload.sh chains/* .ipynb_checkpoints/ CLAUDE.md -.mcp* \ No newline at end of file +.mcp* +uv.lock \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 4973c843..e7ea9941 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,38 @@ [build-system] requires = ["setuptools", "wheel", "numpy"] build-backend = "setuptools.build_meta" + +[project] +name = "pypolychord" +version = "1.22.1" +description = "Python interface to PolyChord" +readme = "README.rst" +license = "LicenseRef-PolyChord" +license-files = ["LICENCE"] +authors = [ + { name = "Will Handley", email = "wh260@cam.ac.uk" }, +] +requires-python = ">=3.9" +dependencies = [ + "numpy", + "scipy", +] + +[project.optional-dependencies] +plotting = ["getdist"] +mpi = ["mpi4py"] + +[dependency-groups] +dev = ["setuptools", "wheel", "mpi4py"] + +[project.urls] +Homepage = "https://github.com/PolyChord/PolyChordLite" + +[tool.uv] +no-build-isolation-package = ["pypolychord"] + +[tool.setuptools.packages.find] +include = ["pypolychord*"] + +[tool.setuptools.package-data] +pypolychord = ["lib/libchord.so"] diff --git a/setup.py b/setup.py index bb2aa37a..26c2ec2b 100644 --- a/setup.py +++ b/setup.py @@ -83,8 +83,9 @@ def __init__(self, *args, **kwargs): class CustomBuildPy(_build_py, object): def run(self): - env = {} - env["PATH"] = os.environ["PATH"] + env = {k: v for k, v in os.environ.items() + if k in ("PATH", "HOME", "USER", "TMPDIR", "LANG", + "TERM", "SHELL", "LOGNAME")} if self.distribution.no_mpi is None: env["MPI"] = "1" # These need to be set so that build_ext uses the right compilers From eaff7b5543e5c511aeb66df32a060a6f40c981ee Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 26 Feb 2026 17:27:23 +0000 Subject: [PATCH 42/44] fix: add fortranformat to package dependencies Required by _make_resume_file for writing Fortran-readable resume files when cube_samples is provided. Was always imported but never declared as a dependency. Co-Authored-By: Claude Opus 4.6 --- pyproject.toml | 1 + setup.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e7ea9941..5e05bd29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ requires-python = ">=3.9" dependencies = [ "numpy", "scipy", + "fortranformat", ] [project.optional-dependencies] diff --git a/setup.py b/setup.py index 26c2ec2b..0d533b55 100644 --- a/setup.py +++ b/setup.py @@ -151,7 +151,7 @@ def run(self): author_email='wh260@cam.ac.uk', license='PolyChord', packages=find_packages(), - install_requires=['numpy','scipy'], + install_requires=['numpy','scipy','fortranformat'], extras_require={'plotting': 'getdist'}, distclass=DistributionWithOption, ext_modules=[pypolychord_module], From b26716550b5c200a303a123ef76c8423b807c6da Mon Sep 17 00:00:00 2001 From: dprelogo Date: Thu, 26 Feb 2026 18:13:57 +0000 Subject: [PATCH 43/44] clean: remove excessive NaN tracing, keep only defensive guards Remove debug NaN checks that were added during NaN investigation: - Remove all commented-out NaN injection test code (generate.F90) - Remove redundant pipeline tracing (calculate, nested_sampling, mpi_utils) - Remove C++ NaN check in loglikelihood callback - Remove "hello world" worker message - Move useful diagnostic checks behind #ifdef DEBUG preprocessor guard - Add -DDEBUG to debug FFLAGS in gnu, intel, intel_llvm Makefiles - Keep defensive recovery guards in regularize_covmat (utils.F90) - Keep zero-magnitude direction vector warning (chordal_sampling.f90) Co-Authored-By: Claude Opus 4.6 --- Makefile_gnu | 2 +- Makefile_intel | 2 +- Makefile_intel_llvm | 2 +- pypolychord/_pypolychord.cpp | 11 ------ src/polychord/calculate.f90 | 35 ++++------------- src/polychord/chordal_sampling.f90 | 63 +++++++++++++++--------------- src/polychord/generate.F90 | 43 -------------------- src/polychord/mpi_utils.F90 | 9 +---- src/polychord/nested_sampling.F90 | 33 ---------------- 9 files changed, 43 insertions(+), 157 deletions(-) diff --git a/Makefile_gnu b/Makefile_gnu index b101336d..89f12ff7 100644 --- a/Makefile_gnu +++ b/Makefile_gnu @@ -62,7 +62,7 @@ ifeq ($(DEBUG),1) # backtrace : produce backtrace of error # fpe-trap : search for floating point exceptions (dividing by zero etc) # fbounds-check : check array indices -FFLAGS += -g -O0 -Wall -Wextra -pedantic -fcheck=all -fimplicit-none -fbacktrace -ffpe-trap=zero,overflow +FFLAGS += -g -O0 -Wall -Wextra -pedantic -fcheck=all -fimplicit-none -fbacktrace -ffpe-trap=zero,overflow -DDEBUG # CXXFLAGS += -g -O0 -Wall -Wextra -Wshadow -Weffc++ CFLAGS += -g -O0 -Wall -Wextra -Wshadow -Weffc++ diff --git a/Makefile_intel b/Makefile_intel index ec83e179..04791a66 100644 --- a/Makefile_intel +++ b/Makefile_intel @@ -49,7 +49,7 @@ ifeq ($(DEBUG),1) # gen-interfaces : generate an interface block for each routine # warn-interfaces: warn on these interface blocks # fpe0 : halts program if dividing by zero, etc -FFLAGS += -g -O0 -traceback -check all,noarg_temp_created -warn all -ftrapuv -debug all -gen-interfaces -warn-interfaces +FFLAGS += -g -O0 -traceback -check all,noarg_temp_created -warn all -ftrapuv -debug all -gen-interfaces -warn-interfaces -DDEBUG CXXFLAGS += -g -O0 -traceback -Wcheck -Wremarks -Wall -Weffc++ -ftrapuv -debug all LDFLAGS += -g else diff --git a/Makefile_intel_llvm b/Makefile_intel_llvm index 0e23e5d0..9583cf1a 100644 --- a/Makefile_intel_llvm +++ b/Makefile_intel_llvm @@ -55,7 +55,7 @@ ifeq ($(DEBUG),1) # gen-interfaces : generate an interface block for each routine # warn-interfaces: warn on these interface blocks # fpe0 : halts program if dividing by zero, etc -FFLAGS += -g -O0 -traceback -check all,noarg_temp_created -warn all -ftrapuv -debug all -gen-interfaces -warn-interfaces +FFLAGS += -g -O0 -traceback -check all,noarg_temp_created -warn all -ftrapuv -debug all -gen-interfaces -warn-interfaces -DDEBUG CXXFLAGS += -g -O0 -traceback -Wcheck -Wremarks -Wall -Weffc++ -ftrapuv -debug all LDFLAGS += -g else diff --git a/pypolychord/_pypolychord.cpp b/pypolychord/_pypolychord.cpp index 5cc5b413..fb727365 100644 --- a/pypolychord/_pypolychord.cpp +++ b/pypolychord/_pypolychord.cpp @@ -4,7 +4,6 @@ #include "interfaces.hpp" #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #include -#include #ifdef USE_MPI #include @@ -39,16 +38,6 @@ static PyObject *python_loglikelihood = NULL; double loglikelihood(double* theta, int nDims, double* phi, int nDerived) { - // --- START OF ADDED WARNING --- - for (int i = 0; i < nDims; ++i) { - if (std::isnan(theta[i])) { - PySys_WriteStderr("PolyChord TRACE (C++ loglikelihood): NaN detected in parameter vector received from Fortran.\n"); - // We can break after the first one is found. - break; - } - } - // --- END OF ADDED WARNING --- - /* Create a python version of theta */ npy_intp theta_shape[] = {nDims}; PyObject *array_theta = PyArray_SimpleNewFromData(1, theta_shape, NPY_DOUBLE, theta); diff --git a/src/polychord/calculate.f90 b/src/polychord/calculate.f90 index 5cbdc9d0..273c6a9c 100644 --- a/src/polychord/calculate.f90 +++ b/src/polychord/calculate.f90 @@ -1,13 +1,13 @@ module calculate_module use utils_module, only: dp - use, intrinsic :: ieee_arithmetic implicit none contains subroutine calculate_point(loglikelihood,prior,point,settings,nlike) use settings_module, only: program_settings - ! added to check for NaN in hypercube vector - use utils_module, only: stdout_unit +#ifdef DEBUG + use, intrinsic :: ieee_arithmetic +#endif implicit none interface @@ -35,23 +35,17 @@ function prior(cube) result(theta) real(dp),dimension(settings%nDims) :: mn, mx ! Min and max bounds real(dp),dimension(settings%nDerived) :: phi ! derived parameters real(dp) :: logL - + mn = merge(-1d0,0d0,settings%wraparound) mx = merge(2d0,1d0,settings%wraparound) cube = point(settings%h0:settings%h1) - ! ! --- NEW AGGRESSIVE CHECK 0 --- - ! if (any(ieee_is_nan(point))) then - ! write(*, '(A)') 'TRACE 0 (calculate_point): NaN detected in the full input POINT vector.' - ! write(*, '(A, *(F24.15))') ' point = ', point - ! end if - - ! --- START OF ADDED WARNING --- +#ifdef DEBUG if (any(ieee_is_nan(cube))) then - write(*,'(A)') 'PolyChord TRACE (calculate_point): NaN detected in hypercube vector before prior transformation.' + write(*,'(A)') 'PolyChord DEBUG (calculate_point): NaN detected in hypercube vector before prior transformation.' end if - ! --- END OF ADDED WARNING --- +#endif if ( any(cubemx) ) then theta = 0 @@ -59,20 +53,8 @@ function prior(cube) result(theta) else where(settings%wraparound) cube = modulo(cube,1d0) - if (any(ieee_is_nan(cube))) then - ! --- NEW AGGRESSIVE CHECK 1 --- - write(*,'(A)') 'TRACE 1 (calculate_point): NaN detected in hypercube vector AFTER MODULO.' - write(*, '(A, *(F24.15))') ' cube = ', cube - end if - theta = prior(cube) - ! --- NEW AGGRESSIVE CHECK 2 (MOST IMPORTANT) --- - if (any(ieee_is_nan(theta))) then - write(*,'(A)') 'TRACE 2 (calculate_point): NaN detected in physical vector THETA before likelihood call.' - write(*, '(A, *(F24.15))') ' theta = ', theta - end if - logL = loglikelihood(theta,phi) end if @@ -102,7 +84,7 @@ function calculate_posterior_point(settings,point,logweight,evidence,volume) res posterior_point(settings%pos_X) = volume ! Likelihood posterior_point(settings%pos_l) = point(settings%l0) - ! Un-normalised weighting + ! Un-normalised weighting posterior_point(settings%pos_w) = logweight ! un-normalise cumulative weighting posterior_point(settings%pos_Z) = evidence @@ -151,5 +133,4 @@ end function calculate_similarity_matrix - end module calculate_module diff --git a/src/polychord/chordal_sampling.f90 b/src/polychord/chordal_sampling.f90 index e481a35c..1057f109 100644 --- a/src/polychord/chordal_sampling.f90 +++ b/src/polychord/chordal_sampling.f90 @@ -1,6 +1,5 @@ module chordal_module use utils_module, only: dp - use, intrinsic :: ieee_arithmetic implicit none contains @@ -8,8 +7,9 @@ module chordal_module function SliceSampling(loglikelihood,prior,settings,logL,seed_point,cholesky,nlike,num_repeats) result(baby_points) use settings_module, only: program_settings use random_module, only: random_orthonormal_basis,random_real - ! added to check for NaN in hypercube vector, to support stdout - use utils_module, only: stdout_unit +#ifdef DEBUG + use, intrinsic :: ieee_arithmetic +#endif implicit none interface @@ -66,14 +66,14 @@ function prior(cube) result(theta) ! Start the baby point at the seed point previous_point = seed_point - ! --- NEW AGGRESSIVE CHECK 5 --- +#ifdef DEBUG if (any(ieee_is_nan(seed_point))) then - write(*, '(A)') 'TRACE 5 (SliceSampling): NaN detected in the input seed_point.' - write(*, '(A, *(F24.15))') ' seed_point = ', seed_point + write(*, '(A)') 'PolyChord DEBUG (SliceSampling): NaN detected in the input seed_point.' end if if (any(ieee_is_nan(cholesky))) then - write(*, '(A)') 'TRACE 5 (SliceSampling): NaN detected in the input cholesky matrix.' + write(*, '(A)') 'PolyChord DEBUG (SliceSampling): NaN detected in the input cholesky matrix.' end if +#endif ! Initialise the likelihood counter at 0 nlike = 0 @@ -84,16 +84,16 @@ function prior(cube) result(theta) ! Transform to the unit hypercube nhats = matmul(cholesky,nhats) - ! --- NEW, CRITICAL TRACE --- +#ifdef DEBUG if (any(ieee_is_nan(nhats))) then - write(*,'(A)') 'PolyChord TRACE (SliceSampling): NaN detected in nhats vector immediately after matmul(cholesky,nhats).' + write(*,'(A)') 'PolyChord DEBUG (SliceSampling): NaN detected in nhats vector immediately after matmul(cholesky,nhats).' if (any(ieee_is_nan(cholesky))) then write(*,'(A)') ' The source is a NaN cholesky matrix.' else write(*,'(A)') ' The source is NOT the cholesky matrix. Problem is in generate_nhats or matmul.' end if end if - ! --- END OF TRACE --- +#endif do i_babies=1,size(nhats,2) ! Get a new random direction @@ -102,12 +102,11 @@ function prior(cube) result(theta) ! Normalise it w = sqrt(dot_product(nhat,nhat)) - ! --- START OF ADDED WARNING --- + ! Warn if direction vector has zero magnitude (root cause of NaN propagation) if (w < 1.d-20) then write(*,'(A)') 'PolyChord WARNING: Slice sampling direction vector has zero magnitude (w=0).' write(*,'(A)') ' Division by zero is imminent, resulting in NaN parameters.' end if - ! --- END OF ADDED WARNING --- nhat = nhat/w w = w * 3d0 !* exp( lgamma(0.5d0 * settings%nDims) - lgamma(1.5d0 + 0.5d0 * settings%nDims) ) * settings%nDims @@ -193,10 +192,12 @@ end function generate_nhats !! function slice_sample(loglikelihood,prior,logL,nhat,x0,w,S,n) result(baby_point) use settings_module, only: program_settings - ! added stdout_unit to support printing warnings about NaN - use utils_module, only: distance, stdout_unit + use utils_module, only: distance use random_module, only: random_real use calculate_module, only: calculate_point +#ifdef DEBUG + use, intrinsic :: ieee_arithmetic +#endif implicit none interface function loglikelihood(theta,phi) @@ -240,21 +241,19 @@ function prior(cube) result(theta) integer :: i_step, i real(dp) :: x0Rd, x0Ld - ! --- NEW AGGRESSIVE CHECK 3 --- +#ifdef DEBUG if (any(ieee_is_nan(x0))) then - write(*, '(A)') 'TRACE 3 (slice_sample): NaN detected in the input seed point x0.' - write(*, '(A, *(F24.15))') ' x0 = ', x0 + write(*, '(A)') 'PolyChord DEBUG (slice_sample): NaN detected in the input seed point x0.' end if if (any(ieee_is_nan(nhat))) then - write(*, '(A)') 'TRACE 3 (slice_sample): NaN detected in the input direction vector nhat.' - write(*, '(A, *(F24.15))') ' nhat = ', nhat + write(*, '(A)') 'PolyChord DEBUG (slice_sample): NaN detected in the input direction vector nhat.' end if +#endif - ! Select initial start and end points temp_random = random_real() - L(S%h0:S%h1) = x0(S%h0:S%h1) - temp_random * w * nhat - R(S%h0:S%h1) = x0(S%h0:S%h1) + (1-temp_random) * w * nhat + L(S%h0:S%h1) = x0(S%h0:S%h1) - temp_random * w * nhat + R(S%h0:S%h1) = x0(S%h0:S%h1) + (1-temp_random) * w * nhat ! Calculate initial likelihoods call calculate_point(loglikelihood,prior,R,S,n) @@ -280,28 +279,29 @@ function prior(cube) result(theta) ! Sample within this bound do i_step=0,100 - ! Find the distance between x0 and L + ! Find the distance between x0 and L x0Ld= distance(x0(S%h0:S%h1),L(S%h0:S%h1), [(.false., i=1,S%nDims)]) - ! Find the distance between x0 and R + ! Find the distance between x0 and R x0Rd= distance(x0(S%h0:S%h1),R(S%h0:S%h1), [(.false., i=1,S%nDims)]) - ! --- NEW AGGRESSIVE CHECK 4 --- +#ifdef DEBUG if (ieee_is_nan(x0Ld) .or. ieee_is_nan(x0Rd)) then - write(*, '(A)') 'TRACE 4 (slice_sample): NaN detected in boundary distances.' + write(*, '(A)') 'PolyChord DEBUG (slice_sample): NaN detected in boundary distances.' write(*, '(A, F24.15)') ' x0Ld = ', x0Ld write(*, '(A, F24.15)') ' x0Rd = ', x0Rd end if +#endif ! Draw a random point within L and R - baby_point(S%h0:S%h1) = x0(S%h0:S%h1)+ (random_real() * (x0Rd+x0Ld) - x0Ld) * nhat + baby_point(S%h0:S%h1) = x0(S%h0:S%h1)+ (random_real() * (x0Rd+x0Ld) - x0Ld) * nhat - ! --- START OF ADDED WARNING --- +#ifdef DEBUG if (any(ieee_is_nan(baby_point(S%h0:S%h1)))) then - write(*,'(A)') 'PolyChord TRACE (slice_sample): NaN detected in new parameter vector immediately after creation.' + write(*,'(A)') 'PolyChord DEBUG (slice_sample): NaN detected in new parameter vector immediately after creation.' end if - ! --- END OF ADDED WARNING --- +#endif - ! calculate the likelihood + ! calculate the likelihood call calculate_point(loglikelihood,prior,baby_point,S,n) ! If we're not within the likelihood bound then we need to sample further @@ -330,4 +330,3 @@ end function slice_sample end module chordal_module - diff --git a/src/polychord/generate.F90 b/src/polychord/generate.F90 index 2be6cec8..d270e552 100644 --- a/src/polychord/generate.F90 +++ b/src/polychord/generate.F90 @@ -4,8 +4,6 @@ !! * GenerateLivePoints module generate_module use utils_module, only: dp - use, intrinsic :: ieee_arithmetic - implicit none @@ -114,10 +112,6 @@ function prior(cube) result(theta) integer :: nlike ! number of likelihood calls integer :: nprior, ndiscarded integer :: ngenerated ! use to track order points are generated in - ! ! --- NEW COUNTER FOR NaN INJECTION TEST --- - ! integer :: calculation_counter - ! ! --- NEW VARIABLE FOR NaN INJECTION TEST --- - ! real(dp) :: nan_generator real(dp) :: time0,time1,total_time real(dp),dimension(size(settings%grade_dims)) :: speed @@ -126,7 +120,6 @@ function prior(cube) result(theta) ! Initialise number of likelihood calls to zero here nlike = 0 ngenerated = 1 - ! calculation_counter = 0 ! Initialize the new counter if(is_root(mpi_information)) then @@ -161,20 +154,6 @@ function prior(cube) result(theta) ! Generate a random coordinate live_point(settings%h0:settings%h1) = random_reals(settings%nDims) - ! ! --- START OF INTENTIONAL NaN INJECTION (FOR TESTING) --- - ! calculation_counter = calculation_counter + 1 - ! if (calculation_counter == 5) then - ! write(*,*) 'TEST (Linear Mode): Intentionally injecting a NaN via sqrt(-1.0).' - ! nan_generator = -1.0_dp - ! live_point(settings%h0) = sqrt(nan_generator) - ! if (any(ieee_is_nan(live_point))) then - ! write(*,*) 'TEST (Linear Mode): NaN detected in live_point, ieee_is_nan works.' - ! else - ! write(*,*) 'TEST (Linear Mode): NaN not detected in live_point, ieee_is_nan failed.' - ! end if - ! end if - ! ! --- END OF INTENTIONAL NaN INJECTION --- - ! Compute physical coordinates, likelihoods and derived parameters time0 = time() call calculate_point( loglikelihood, prior, live_point, settings, nlike) @@ -269,28 +248,6 @@ function prior(cube) result(theta) ! The workers simply generate and send points until they're told to stop by the administrator do while(live_point_needed(live_point,mpi_information)) - ! ! --- START OF INTENTIONAL NaN INJECTION (FOR TESTING) --- - ! calculation_counter = calculation_counter + 1 - ! if (mpi_information%rank == 1 .and. calculation_counter == 5) then - ! write(*,*) 'TEST (Parallel Mode, Rank 1): Intentionally injecting a NaN via sqrt(-1.0).' - ! nan_generator = -1.0_dp - ! live_point(settings%h0) = sqrt(nan_generator) - - ! write(*,*) 'DEBUG: The injected value is: ', live_point(settings%h0) - - ! if (ieee_is_nan(live_point(settings%h0))) then - ! write(*,*) 'DEBUG: Scalar check PASSED. The value is a NaN.' - ! else - ! write(*,*) 'DEBUG: Scalar check FAILED. The value is NOT a NaN.' - ! end if - - ! if (any(ieee_is_nan(live_point))) then - ! write(*,*) 'TEST (Parallel Mode, Rank 1): NaN detected in live_point, ieee_is_nan works.' - ! else - ! write(*,*) 'TEST (Parallel Mode, Rank 1): NaN not detected in live_point, ieee_is_nan failed.' - ! end if - ! end if - ! ! --- END OF INTENTIONAL NaN INJECTION --- time0 = time() call calculate_point( loglikelihood, prior, live_point, settings,nlike) ! Compute physical coordinates, likelihoods and derived parameters diff --git a/src/polychord/mpi_utils.F90 b/src/polychord/mpi_utils.F90 index 8fdb0833..fe66c2ca 100644 --- a/src/polychord/mpi_utils.F90 +++ b/src/polychord/mpi_utils.F90 @@ -1,6 +1,5 @@ module mpi_module use utils_module, only: dp, normal_fb - use, intrinsic :: ieee_arithmetic implicit none #ifdef MPI @@ -436,13 +435,7 @@ subroutine throw_babies(baby_points,nlike,epoch,mpi_information) integer, intent(in) :: epoch !> The epoch the babies were generated in type(mpi_bundle), intent(in) :: mpi_information - ! --- START OF ADDED TRACE --- - if (any(ieee_is_nan(baby_points))) then - write(*,'(A, I0, A)') 'PolyChord TRACE (throw_babies): Worker rank ', mpi_information%rank, ' is about to send a NaN baby_points array to the root.' - endif - ! --- END OF ADDED TRACE --- - - call MPI_SEND( &! + call MPI_SEND( &! baby_points, &! size(baby_points,1)*size(baby_points,2),&! MPI_DOUBLE_PRECISION, &! diff --git a/src/polychord/nested_sampling.F90 b/src/polychord/nested_sampling.F90 index accc24c2..b7d5b3ba 100644 --- a/src/polychord/nested_sampling.F90 +++ b/src/polychord/nested_sampling.F90 @@ -1,6 +1,5 @@ module nested_sampling_module use utils_module, only: dp - use, intrinsic :: ieee_arithmetic #ifdef MPI use mpi_module, only: get_mpi_information,mpi_bundle,is_root,linear_mode,catch_babies,throw_babies,throw_seed,catch_seed,broadcast_integers,mpi_synchronise @@ -248,16 +247,6 @@ end subroutine dumper ! Choose the cholesky decomposition for the cluster cholesky = RTI%cholesky(:,:,cluster_id) - ! --- NEW AGGRESSIVE CHECK 6 --- - if (any(ieee_is_nan(seed_point))) then - write(*, '(A, I0)') 'TRACE 6 (NestedSampling): NaN seed_point chosen from cluster ', cluster_id - write(*, '(A, *(F24.15))') ' seed_point = ', seed_point - end if - if (any(ieee_is_nan(cholesky))) then - write(*, '(A, I0)') 'TRACE 6 (NestedSampling): NaN cholesky matrix chosen from cluster ', cluster_id - ! Not printing the matrix as it's large, its presence is enough. - end if - ! Get the loglikelihood contour we're generating from logL = RTI%logLp(cluster_id) @@ -269,11 +258,6 @@ end subroutine dumper ! Generate a new set of points within the likelihood bound of the late point baby_points = SliceSampling(loglikelihood,prior,settings,logL,seed_point,cholesky,nlike,num_repeats) - ! === DEBUG PRIORITY 3: Check for NaN from SliceSampling === - if (any(ieee_is_nan(baby_points))) then - write(*,'(A)') 'DEBUG (NestedSampling): ERROR! NaN in baby_points from SliceSampling (linear mode) (Failure Point 4 upstream)' - endif - baby_points(settings%b0,:) = logL ! Note the moment it is born at #ifdef MPI else if(settings%synchronous) then @@ -309,12 +293,6 @@ end subroutine dumper ! Recieve any new baby points from any worker currently sending worker_id = catch_babies(baby_points,nlike,worker_epoch,mpi_information) - ! === DEBUG PRIORITY 3: Check for NaN from worker === - if (any(ieee_is_nan(baby_points))) then - write(*,'(A,I3,A)') 'DEBUG (NestedSampling): ERROR! NaN in baby_points from worker ', worker_id, & - ' (Failure Point 4 upstream)' - endif - ! and throw seeding information back to worker (true => keep going) call throw_seed(seed_point,cholesky,logL,mpi_information,worker_id,administrator_epoch,.true.) @@ -467,10 +445,6 @@ end subroutine dumper else !(myrank/=root) - ! --- START OF HELLO WORLD TEST --- - write(*, '(A, I0, A)') 'Worker rank ', mpi_information%rank, ' reporting for duty.' - ! --- END OF HELLO WORLD TEST --- - ! These are the worker tasks ! -------------------------- ! @@ -512,13 +486,6 @@ end subroutine dumper time1 = time() slice_time = slice_time + time1-time0 - ! --- NEW TRACE --- - if (any(ieee_is_nan(baby_points))) then - write(*,'(A, I0, A)') 'PolyChord TRACE (NestedSampling): Worker rank ', mpi_information%rank, ' received NaN array from SliceSampling.' - endif - ! --- END TRACE --- - - ! 3) Send the baby points back call throw_babies(baby_points,nlike,worker_epoch,mpi_information) From 4229c227ee3b304902c19070c7c38962d893eb75 Mon Sep 17 00:00:00 2001 From: dprelogo Date: Fri, 27 Feb 2026 14:27:17 +0000 Subject: [PATCH 44/44] fix: replace fortranformat with custom Fortran formatter in _make_resume_file Remove the fortranformat dependency which crashes on numpy NaN values (its NaN guard uses `type(val) is float` strict identity check, missing np.float64). Replace with custom _format_fortran_double (E24.15E3) and _format_fortran_int (I12) functions that correctly handle NaN, Inf, and all normal values with proper Fortran 0-based mantissa normalization. Co-Authored-By: Claude Opus 4.6 --- .github/workflows/CI.yml | 2 +- pypolychord/polychord.py | 112 +++++++++++++++++++++++++++-------- pyproject.toml | 1 - setup.py | 2 +- tests/test_fortran_format.py | 109 ++++++++++++++++++++++++++++++++++ 5 files changed, 197 insertions(+), 29 deletions(-) create mode 100644 tests/test_fortran_format.py diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index ed6a52bb..3b5831eb 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -72,7 +72,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install mpi4py pytest fortranformat anesthetic + python -m pip install mpi4py pytest anesthetic - name: Install pypolychord run: pip install -v '.[test]' diff --git a/pypolychord/polychord.py b/pypolychord/polychord.py index 8257ee74..1ce17e99 100644 --- a/pypolychord/polychord.py +++ b/pypolychord/polychord.py @@ -659,14 +659,52 @@ def wrap_prior(cube, theta): +def _format_fortran_double(val, w=24, d=15, e=3): + """Format a float in Fortran E format (E24.15E3). + + Produces output like: ' 0.123456789012345E+001' + """ + import math + if math.isnan(val): + return f"{'NaN':>{w}}" + if math.isinf(val): + s = '-Infinity' if val < 0 else 'Infinity' + return f"{s:>{w}}" + if val == 0.0: + sign = '-' if math.copysign(1.0, val) < 0 else ' ' + return f"{sign}0.{'0' * d}E+{'0' * e}".rjust(w) + sign = '-' if val < 0 else ' ' + aval = abs(val) + # Get Python scientific notation with d digits of precision + # Python's :.{d}E gives d digits after the decimal with 1 digit before + py_str = f"{aval:.{d}E}" + # Parse mantissa and exponent from Python format: "d.dddE±ee" + mantissa_str, exp_str = py_str.split('E') + exp_val = int(exp_str) + # Python: d.ddd × 10^exp → Fortran: 0.dddd × 10^(exp+1) + # Shift decimal: remove '.', prepend '0.' + digits = mantissa_str.replace('.', '') # e.g. "1234567890123456" + # digits has d+1 characters; we need d digits after '0.' + fortran_mantissa = '0.' + digits[:d] + fortran_exp = exp_val + 1 + exp_sign = '+' if fortran_exp >= 0 else '-' + exp_digits = f"{abs(fortran_exp):0{e}d}" + result = f"{sign}{fortran_mantissa}E{exp_sign}{exp_digits}" + return result.rjust(w) + + +def _format_fortran_int(val, w=12): + """Format an integer right-justified in width w.""" + return f"{int(val):>{w}d}" + + def _make_resume_file(loglikelihood, **kwargs): - import fortranformat as ff resume_filename = Path(kwargs['base_dir']) / (kwargs['file_root'] + ".resume") try: from mpi4py import MPI - comm = MPI.COMM_WORLD + comm = kwargs.get('comm', None) or MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() except ImportError: @@ -676,20 +714,28 @@ def _make_resume_file(loglikelihood, **kwargs): lives = [] logL_birth = kwargs['logzero'] - for i in np.array_split(np.arange(len(kwargs['cube_samples'])), size)[rank]: - cube = kwargs['cube_samples'][i] - theta = kwargs['prior'](cube) - logL = loglikelihood(theta) - try: - logL, derived = logL - except TypeError: - derived = [] - nDims = len(theta) - nDerived = len(derived) - lives.append(np.concatenate([cube,theta,derived,[logL_birth, logL]])) - - if MPI: - sendbuf = np.array(lives).flatten() + n_samples = len(kwargs['cube_samples']) + + # In MPI mode with multiple ranks, only workers (rank > 0) evaluate + # likelihoods, matching PolyChord's controller/worker split where rank 0 + # is the administrator. In serial mode, rank 0 does everything. + if MPI and size > 1: + n_workers = size - 1 + if rank > 0: + worker_index = rank - 1 + for i in np.array_split(np.arange(n_samples), n_workers)[worker_index]: + cube = kwargs['cube_samples'][i] + theta = kwargs['prior'](cube) + logL = loglikelihood(theta) + try: + logL, derived = logL + except TypeError: + derived = [] + nDims = len(theta) + nDerived = len(derived) + lives.append(np.concatenate([cube,theta,derived,[logL_birth, logL]])) + + sendbuf = np.array(lives).flatten() if lives else np.array([], dtype=np.float64) sendcounts = np.array(comm.gather(len(sendbuf))) if rank == 0: recvbuf = np.empty(sum(sendcounts)) @@ -697,22 +743,36 @@ def _make_resume_file(loglikelihood, **kwargs): recvbuf = None comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendcounts), root=0) + if rank == 0: + # Infer row width: nDims (cube) + nDims (theta) + nDerived + 2 (logL_birth, logL) + nDims = len(kwargs['cube_samples'][0]) + nDerived = kwargs.get('nDerived', 0) + row_width = 2 * nDims + nDerived + 2 + lives = np.reshape(recvbuf, (n_samples, row_width)) + else: + for i in range(n_samples): + cube = kwargs['cube_samples'][i] + theta = kwargs['prior'](cube) + logL = loglikelihood(theta) + try: + logL, derived = logL + except TypeError: + derived = [] + nDims = len(theta) + nDerived = len(derived) + lives.append(np.concatenate([cube,theta,derived,[logL_birth, logL]])) + lives = np.array(lives) + if rank == 0: - if MPI: - lives = np.reshape(recvbuf, (len(kwargs['cube_samples']), len(lives[0]))) - else: - lives = np.array(lives) with open(resume_filename,"w") as f: def write(var): var = np.atleast_1d(var) if isinstance(var[0], np.integer): - fmt = '(%iI12)' % var.size - elif isinstance(var[0], np.double): - fmt = '(%iE24.15E3)' % var.size + f.write(''.join(_format_fortran_int(v) for v in var) + '\n') + elif isinstance(var[0], (np.floating, float)): + f.write(''.join(_format_fortran_double(v) for v in var) + '\n') else: - fmt = '(A)' - writer = ff.FortranRecordWriter(fmt) - f.write(writer.write(var) + '\n') + f.write(str(var[0]) + '\n') write('=== Number of dimensions ===') write(nDims) diff --git a/pyproject.toml b/pyproject.toml index 5e05bd29..e7ea9941 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ requires-python = ">=3.9" dependencies = [ "numpy", "scipy", - "fortranformat", ] [project.optional-dependencies] diff --git a/setup.py b/setup.py index 0d533b55..26c2ec2b 100644 --- a/setup.py +++ b/setup.py @@ -151,7 +151,7 @@ def run(self): author_email='wh260@cam.ac.uk', license='PolyChord', packages=find_packages(), - install_requires=['numpy','scipy','fortranformat'], + install_requires=['numpy','scipy'], extras_require={'plotting': 'getdist'}, distclass=DistributionWithOption, ext_modules=[pypolychord_module], diff --git a/tests/test_fortran_format.py b/tests/test_fortran_format.py new file mode 100644 index 00000000..a4d8bc16 --- /dev/null +++ b/tests/test_fortran_format.py @@ -0,0 +1,109 @@ +"""Tests for the custom Fortran E24.15E3 and I12 formatters.""" + +import math +import numpy as np +import pytest +from pypolychord.polychord import _format_fortran_double, _format_fortran_int + + +class TestFormatFortranDouble: + """Test _format_fortran_double against Fortran E24.15E3 output.""" + + def test_field_width(self): + """Every formatted value should be exactly 24 characters.""" + values = [1.0, -1.0, 0.0, 1e-100, 1e+300, float('nan'), float('inf')] + for v in values: + result = _format_fortran_double(v) + assert len(result) == 24, f"Width {len(result)} for {v}: '{result}'" + + def test_positive_one(self): + result = _format_fortran_double(1.0) + assert result.strip() == '0.100000000000000E+001' + + def test_negative_pi(self): + result = _format_fortran_double(-3.14159265358979) + assert result.strip().startswith('-0.314159265358979') + assert 'E+' in result + + def test_zero(self): + result = _format_fortran_double(0.0) + assert result.strip() == '0.000000000000000E+000' + + def test_negative_zero(self): + result = _format_fortran_double(-0.0) + assert result.strip() == '-0.000000000000000E+000' + + def test_small_value(self): + result = _format_fortran_double(1e-100) + # Fortran 0-based: 1e-100 = 0.1e-99 + assert 'E-099' in result + assert result.strip().startswith('0.1000000000000') + + def test_large_value(self): + result = _format_fortran_double(1e+300) + assert 'E+301' in result + assert result.strip().startswith('0.1000000000000') + + def test_nan(self): + for nan_val in [float('nan'), np.float64('nan')]: + result = _format_fortran_double(nan_val) + assert len(result) == 24 + assert result.strip() == 'NaN' + + def test_inf(self): + result = _format_fortran_double(float('inf')) + assert len(result) == 24 + assert result.strip() == 'Infinity' + + def test_neg_inf(self): + result = _format_fortran_double(float('-inf')) + assert len(result) == 24 + assert result.strip() == '-Infinity' + + def test_logzero(self): + """Test the typical logzero value used in PolyChord.""" + result = _format_fortran_double(-1e30) + assert len(result) == 24 + assert result.strip().startswith('-0.1000000000000') + assert 'E+031' in result + + def test_numpy_float64(self): + """Ensure numpy float64 values work correctly.""" + result = _format_fortran_double(np.float64(2.5)) + assert len(result) == 24 + assert result.strip() == '0.250000000000000E+001' + + def test_normalization(self): + """Mantissa should always start with 0. (Fortran convention).""" + values = [1.0, 0.5, 123.456, 1e-10, 9.99] + for v in values: + result = _format_fortran_double(v).strip() + # After optional sign, should start with '0.' + if result.startswith('-'): + assert result[1:3] == '0.', f"Bad normalization for {v}: {result}" + else: + assert result[0:2] == '0.', f"Bad normalization for {v}: {result}" + + +class TestFormatFortranInt: + """Test _format_fortran_int against Fortran I12 output.""" + + def test_field_width(self): + result = _format_fortran_int(42) + assert len(result) == 12 + + def test_right_justified(self): + result = _format_fortran_int(5) + assert result == ' 5' + + def test_negative(self): + result = _format_fortran_int(-10) + assert result == ' -10' + + def test_zero(self): + result = _format_fortran_int(0) + assert result == ' 0' + + def test_numpy_int(self): + result = _format_fortran_int(np.int64(100)) + assert result == ' 100'