From 1e2a2cb394fc4bb09aaf82a720226b5398cf3d92 Mon Sep 17 00:00:00 2001 From: Mathieu Taillefumier Date: Tue, 14 Oct 2025 15:32:53 +0200 Subject: [PATCH 01/19] CMake support --- .gitignore | 1 - CMakeLists.txt | 390 +++ cmake/FindCLime.cmake | 27 + cmake/FindLemon.cmake | 25 + cmake/git_hash.h.in | 6 + .../tmlqcd_config_internal.h.in | 130 +- cmake_includes.txt | 425 ++++ io/Makefile.in | 135 - src/bin/CMakeLists.txt | 19 + LapH_ev.c => src/bin/LapH_ev.c | 0 benchmark.c => src/bin/benchmark.c | 0 .../bin/check_locallity.c | 4 +- deriv_mg_tune.c => src/bin/deriv_mg_tune.c | 0 gen_sources.c => src/bin/gen_sources.c | 0 hmc_tm.c => src/bin/hmc_tm.c | 0 hopping_test.c => src/bin/hopping_test.c | 0 invert.c => src/bin/invert.c | 0 {util => src/bin}/main_ildg2uk.c | 0 .../bin/offline_measurement.c | 0 .../bin/qphix_test_Dslash.c | 0 {test => src/bin}/scalar_prod_r_test.c | 0 {test => src/bin}/test_eigenvalues.c | 0 test_lemon.c => src/bin/test_lemon.c | 0 src/lib/CMakeLists.txt | 457 ++++ .../lib/DDalphaAMG_interface.c | 0 .../lib/DDalphaAMG_interface.h | 0 Ptilde_nd.c => src/lib/Ptilde_nd.c | 0 Ptilde_nd.h => src/lib/Ptilde_nd.h | 0 aligned_malloc.c => src/lib/aligned_malloc.c | 0 aligned_malloc.h => src/lib/aligned_malloc.h | 0 block.c => src/lib/block.c | 0 block.h => src/lib/block.h | 0 boundary.c => src/lib/boundary.c | 0 boundary.h => src/lib/boundary.h | 0 {buffers => src/lib/buffers}/Makefile.in | 0 {buffers => src/lib/buffers}/gauge.c | 0 {buffers => src/lib/buffers}/gauge.h | 0 {buffers => src/lib/buffers}/gauge.ih | 0 .../buffers}/gauge_allocate_gauge_buffers.c | 0 .../buffers}/gauge_finalize_gauge_buffers.c | 0 .../gauge_free_unused_gauge_buffers.c | 0 .../lib/buffers}/gauge_get_gauge_field.c | 0 .../buffers}/gauge_get_gauge_field_array.c | 0 .../buffers}/gauge_initialize_gauge_buffers.c | 0 .../lib/buffers}/gauge_return_gauge_field.c | 0 .../buffers}/gauge_return_gauge_field_array.c | 0 {buffers => src/lib/buffers}/utils.h | 0 {buffers => src/lib/buffers}/utils.ih | 0 .../utils_generic_exchange.blocking.inc | 0 .../lib/buffers}/utils_generic_exchange.c | 0 .../utils_generic_exchange.nonblocking.inc | 0 .../lib/chebyshev_polynomial.c | 4 +- .../lib/chebyshev_polynomial.h | 0 .../lib/chebyshev_polynomial_nd.c | 0 .../lib/chebyshev_polynomial_nd.h | 0 clenshaw_coef.c => src/lib/clenshaw_coef.c | 0 clenshaw_coef.h => src/lib/clenshaw_coef.h | 0 .../lib/compare_derivative.c | 0 .../lib/compare_derivative.h | 0 {cu => src/lib/cu}/COPYING | 0 {cu => src/lib/cu}/COPYING.LESSER | 0 {cu => src/lib/cu}/Makefile.in | 0 {cu => src/lib/cu}/check-regressions | 0 {cu => src/lib/cu}/cu.c | 0 {cu => src/lib/cu}/cu.h | 0 .../lib/default_input_values.h | 0 deriv_Sb.c => src/lib/deriv_Sb.c | 0 deriv_Sb.h => src/lib/deriv_Sb.h | 0 deriv_Sb_D_psi.c => src/lib/deriv_Sb_D_psi.c | 0 deriv_Sb_D_psi.h => src/lib/deriv_Sb_D_psi.h | 0 expo.c => src/lib/expo.c | 0 expo.h => src/lib/expo.h | 0 fatal_error.c => src/lib/fatal_error.c | 0 fatal_error.h => src/lib/fatal_error.h | 0 gamma.c => src/lib/gamma.c | 0 gamma.h => src/lib/gamma.h | 0 geometry_eo.c => src/lib/geometry_eo.c | 0 geometry_eo.h => src/lib/geometry_eo.h | 0 .../lib/get_rectangle_staples.c | 0 .../lib/get_rectangle_staples.h | 0 get_staples.c => src/lib/get_staples.c | 0 get_staples.h => src/lib/get_staples.h | 0 getopt.c => src/lib/getopt.c | 0 getopt.h => src/lib/getopt.h | 0 gettime.c => src/lib/gettime.c | 0 gettime.h => src/lib/gettime.h | 0 global.h => src/lib/global.h | 0 .../lib/hamiltonian_field.h | 0 {include => src/lib/include}/tmLQCD.h | 0 {include => src/lib/include}/tmlqcd_config.h | 0 {init => src/lib/init}/Makefile.in | 0 {init => src/lib/init}/init.h | 0 {init => src/lib/init}/init_bispinor_field.c | 0 {init => src/lib/init}/init_bispinor_field.h | 0 .../lib/init}/init_chi_spinor_field.c | 0 .../lib/init}/init_chi_spinor_field.h | 0 .../lib/init}/init_critical_globals.c | 0 .../lib/init}/init_critical_globals.h | 0 .../lib/init}/init_dirac_halfspinor.c | 0 .../lib/init}/init_dirac_halfspinor.h | 0 {init => src/lib/init}/init_gauge_fg.c | 0 {init => src/lib/init}/init_gauge_fg.h | 0 {init => src/lib/init}/init_gauge_field.c | 0 {init => src/lib/init}/init_gauge_field.h | 0 {init => src/lib/init}/init_gauge_tmp.c | 0 {init => src/lib/init}/init_gauge_tmp.h | 0 .../lib/init}/init_geometry_indices.c | 0 .../lib/init}/init_geometry_indices.h | 0 {init => src/lib/init}/init_global_states.c | 0 {init => src/lib/init}/init_global_states.h | 0 {init => src/lib/init}/init_moment_field.c | 0 {init => src/lib/init}/init_moment_field.h | 0 .../lib/init}/init_omp_accumulators.c | 0 .../lib/init}/init_omp_accumulators.h | 0 {init => src/lib/init}/init_openmp.c | 0 {init => src/lib/init}/init_openmp.h | 0 {init => src/lib/init}/init_parallel.c | 0 {init => src/lib/init}/init_parallel.h | 0 {init => src/lib/init}/init_spinor_field.c | 0 {init => src/lib/init}/init_spinor_field.h | 0 .../lib/init}/init_stout_smear_vars.c | 0 .../lib/init}/init_stout_smear_vars.h | 0 integrator.c => src/lib/integrator.c | 0 integrator.h => src/lib/integrator.h | 0 .../lib/invert_clover_eo.c | 0 .../lib/invert_clover_eo.h | 0 .../lib/invert_doublet_eo.c | 0 .../lib/invert_doublet_eo.h | 0 invert_eo.c => src/lib/invert_eo.c | 1 + invert_eo.h => src/lib/invert_eo.h | 0 invert_overlap.c => src/lib/invert_overlap.c | 0 invert_overlap.h => src/lib/invert_overlap.h | 0 {io => src/lib/io}/DML_crc32.c | 0 {io => src/lib/io}/deri_write_stdout.c | 0 {io => src/lib/io}/deri_write_stdout.h | 0 {io => src/lib/io}/dml.c | 0 {io => src/lib/io}/dml.h | 0 {io => src/lib/io}/eospinor.h | 0 {io => src/lib/io}/eospinor.ih | 0 {io => src/lib/io}/eospinor_read.c | 0 {io => src/lib/io}/eospinor_write.c | 0 {io => src/lib/io}/gauge.h | 0 {io => src/lib/io}/gauge.ih | 0 {io => src/lib/io}/gauge_read.c | 0 {io => src/lib/io}/gauge_read_binary.c | 0 {io => src/lib/io}/gauge_write.c | 0 {io => src/lib/io}/gauge_write_binary.c | 0 .../lib/io}/gauge_write_luscher_binary.c | 0 .../lib/io}/gauge_write_luscher_binary.h | 0 {io => src/lib/io}/io_cm.c | 0 {io => src/lib/io}/io_cm.h | 0 {io => src/lib/io}/params.h | 0 {io => src/lib/io}/params.ih | 0 .../lib/io}/params_construct_InverterInfo.c | 0 .../lib/io}/params_construct_ildgFormat.c | 0 .../io}/params_construct_propagatorFormat.c | 0 .../lib/io}/params_construct_sourceFormat.c | 0 {io => src/lib/io}/params_construct_xlfInfo.c | 0 {io => src/lib/io}/selector.h | 0 {io => src/lib/io}/spinor.h | 0 {io => src/lib/io}/spinor.ih | 0 {io => src/lib/io}/spinor_read.c | 0 {io => src/lib/io}/spinor_read_binary.c | 0 {io => src/lib/io}/spinor_write.c | 0 {io => src/lib/io}/spinor_write_binary.c | 0 {io => src/lib/io}/spinor_write_info.c | 0 .../lib/io}/spinor_write_propagator_format.c | 0 .../lib/io}/spinor_write_propagator_type.c | 0 .../lib/io}/spinor_write_source_format.c | 0 {io => src/lib/io}/spinor_write_stdout.c | 0 {io => src/lib/io}/spinor_write_stdout.h | 0 {io => src/lib/io}/sw_write_stdout.c | 0 {io => src/lib/io}/sw_write_stdout.h | 0 {io => src/lib/io}/utils.c | 0 {io => src/lib/io}/utils.h | 2 - {io => src/lib/io}/utils.ih | 4 +- .../lib/io}/utils_close_reader_record.c | 0 .../lib/io}/utils_close_writer_record.c | 0 {io => src/lib/io}/utils_construct_reader.c | 2 + {io => src/lib/io}/utils_construct_writer.c | 0 {io => src/lib/io}/utils_destruct_reader.c | 0 {io => src/lib/io}/utils_destruct_writer.c | 0 {io => src/lib/io}/utils_engineering.c | 0 {io => src/lib/io}/utils_kill_with_error.c | 0 {io => src/lib/io}/utils_parse_checksum_xml.c | 0 .../lib/io}/utils_parse_ildgformat_xml.c | 0 .../lib/io}/utils_parse_propagator_type.c | 0 {io => src/lib/io}/utils_read_message.c | 0 {io => src/lib/io}/utils_write_checksum.c | 0 .../lib/io}/utils_write_first_message.c | 0 {io => src/lib/io}/utils_write_header.c | 0 {io => src/lib/io}/utils_write_ildg_format.c | 0 .../lib/io}/utils_write_inverter_info.c | 0 {io => src/lib/io}/utils_write_message.c | 0 {io => src/lib/io}/utils_write_xlf.c | 0 {io => src/lib/io}/utils_write_xlf_xml.c | 0 .../lib/kahan_summation.h | 0 {linalg => src/lib/linalg}/Makefile.in | 0 {linalg => src/lib/linalg}/add.c | 0 {linalg => src/lib/linalg}/add.h | 0 {linalg => src/lib/linalg}/addto_32.c | 0 {linalg => src/lib/linalg}/addto_32.h | 0 {linalg => src/lib/linalg}/assign.c | 0 {linalg => src/lib/linalg}/assign.h | 0 {linalg => src/lib/linalg}/assign_add_mul.c | 0 {linalg => src/lib/linalg}/assign_add_mul.h | 0 .../lib/linalg}/assign_add_mul_add_mul.c | 0 .../lib/linalg}/assign_add_mul_add_mul.h | 0 .../lib/linalg}/assign_add_mul_add_mul_r.c | 0 .../lib/linalg}/assign_add_mul_add_mul_r.h | 0 .../lib/linalg}/assign_add_mul_body.c | 0 {linalg => src/lib/linalg}/assign_add_mul_r.c | 0 {linalg => src/lib/linalg}/assign_add_mul_r.h | 0 .../lib/linalg}/assign_add_mul_r_32.c | 5 +- .../lib/linalg}/assign_add_mul_r_32.h | 0 .../lib/linalg}/assign_add_mul_r_add_mul.c | 0 .../lib/linalg}/assign_add_mul_r_add_mul.h | 0 {linalg => src/lib/linalg}/assign_diff_mul.c | 0 {linalg => src/lib/linalg}/assign_diff_mul.h | 0 {linalg => src/lib/linalg}/assign_mul_add.c | 0 {linalg => src/lib/linalg}/assign_mul_add.h | 0 .../lib/linalg}/assign_mul_add_mul.c | 0 .../lib/linalg}/assign_mul_add_mul.h | 0 .../assign_mul_add_mul_add_mul_add_mul_r.c | 0 .../assign_mul_add_mul_add_mul_add_mul_r.h | 0 .../linalg}/assign_mul_add_mul_add_mul_r.c | 0 .../linalg}/assign_mul_add_mul_add_mul_r.h | 0 .../lib/linalg}/assign_mul_add_mul_r.c | 0 .../lib/linalg}/assign_mul_add_mul_r.h | 0 .../lib/linalg}/assign_mul_add_mul_r_32.c | 0 .../lib/linalg}/assign_mul_add_mul_r_32.h | 0 {linalg => src/lib/linalg}/assign_mul_add_r.c | 0 {linalg => src/lib/linalg}/assign_mul_add_r.h | 0 .../lib/linalg}/assign_mul_add_r_32.c | 0 .../lib/linalg}/assign_mul_add_r_32.h | 0 .../lib/linalg}/assign_mul_add_r_and_square.c | 0 .../lib/linalg}/assign_mul_add_r_and_square.h | 0 .../linalg}/assign_mul_bra_add_mul_ket_add.c | 0 .../linalg}/assign_mul_bra_add_mul_ket_add.h | 0 .../assign_mul_bra_add_mul_ket_add_r.c | 0 .../assign_mul_bra_add_mul_ket_add_r.h | 0 .../lib/linalg}/assign_mul_bra_add_mul_r.c | 0 .../lib/linalg}/assign_mul_bra_add_mul_r.h | 0 {linalg => src/lib/linalg}/assign_to_32.c | 0 {linalg => src/lib/linalg}/assign_to_32.h | 0 {linalg => src/lib/linalg}/blas.h | 0 {linalg => src/lib/linalg}/comp_decomp.c | 0 {linalg => src/lib/linalg}/comp_decomp.h | 0 .../lib/linalg}/convert_eo_to_lexic.c | 0 .../lib/linalg}/convert_eo_to_lexic.h | 0 .../lib/linalg}/convert_even_to_lexic.c | 0 .../lib/linalg}/convert_even_to_lexic.h | 0 .../lib/linalg}/convert_odd_to_lexic.c | 0 .../lib/linalg}/convert_odd_to_lexic.h | 0 {linalg => src/lib/linalg}/diff.c | 0 {linalg => src/lib/linalg}/diff.h | 0 {linalg => src/lib/linalg}/diff_32.c | 0 {linalg => src/lib/linalg}/diff_32.h | 0 .../lib/linalg}/diff_and_square_norm.c | 0 .../lib/linalg}/diff_and_square_norm.h | 0 {linalg => src/lib/linalg}/fortran.h | 0 {linalg => src/lib/linalg}/lapack.h | 0 {linalg => src/lib/linalg}/map_to_blas.h | 0 {linalg => src/lib/linalg}/mattimesvec.c | 0 {linalg => src/lib/linalg}/mattimesvec.h | 0 {linalg => src/lib/linalg}/mul.c | 0 {linalg => src/lib/linalg}/mul.h | 0 {linalg => src/lib/linalg}/mul_add_mul.c | 0 {linalg => src/lib/linalg}/mul_add_mul.h | 0 {linalg => src/lib/linalg}/mul_add_mul_r.c | 0 {linalg => src/lib/linalg}/mul_add_mul_r.h | 0 {linalg => src/lib/linalg}/mul_diff_mul.c | 0 {linalg => src/lib/linalg}/mul_diff_mul.h | 0 {linalg => src/lib/linalg}/mul_diff_mul_r.c | 0 {linalg => src/lib/linalg}/mul_diff_mul_r.h | 0 {linalg => src/lib/linalg}/mul_diff_r.c | 0 {linalg => src/lib/linalg}/mul_diff_r.h | 0 {linalg => src/lib/linalg}/mul_gamma5.c | 0 {linalg => src/lib/linalg}/mul_gamma5.h | 0 {linalg => src/lib/linalg}/mul_r.c | 0 {linalg => src/lib/linalg}/mul_r.h | 0 {linalg => src/lib/linalg}/mul_r_32.c | 0 {linalg => src/lib/linalg}/mul_r_32.h | 0 {linalg => src/lib/linalg}/mul_r_gamma5.c | 0 {linalg => src/lib/linalg}/mul_r_gamma5.h | 0 {linalg => src/lib/linalg}/print_spinor.c | 0 {linalg => src/lib/linalg}/print_spinor.h | 0 .../linalg}/print_spinor_similar_components.c | 0 .../linalg}/print_spinor_similar_components.h | 0 {linalg => src/lib/linalg}/ratio.c | 0 {linalg => src/lib/linalg}/ratio.h | 0 {linalg => src/lib/linalg}/scalar_prod.c | 0 {linalg => src/lib/linalg}/scalar_prod.h | 0 {linalg => src/lib/linalg}/scalar_prod_body.c | 0 {linalg => src/lib/linalg}/scalar_prod_i.c | 0 {linalg => src/lib/linalg}/scalar_prod_i.h | 0 {linalg => src/lib/linalg}/scalar_prod_r.c | 0 {linalg => src/lib/linalg}/scalar_prod_r.h | 0 {linalg => src/lib/linalg}/scalar_prod_r_32.c | 0 {linalg => src/lib/linalg}/scalar_prod_r_32.h | 0 {linalg => src/lib/linalg}/set_even_to_zero.c | 0 {linalg => src/lib/linalg}/set_even_to_zero.h | 0 .../lib/linalg}/square_and_minmax.c | 0 .../lib/linalg}/square_and_minmax.h | 0 .../lib/linalg}/square_and_prod_r.c | 0 .../lib/linalg}/square_and_prod_r.h | 0 {linalg => src/lib/linalg}/square_norm.c | 0 {linalg => src/lib/linalg}/square_norm.h | 0 {linalg => src/lib/linalg}/square_norm_32.c | 0 {linalg => src/lib/linalg}/square_norm_32.h | 0 linalg_eo.h => src/lib/linalg_eo.h | 0 little_D.c => src/lib/little_D.c | 0 little_D.h => src/lib/little_D.h | 0 little_D_body.c => src/lib/little_D_body.c | 0 matrix_utils.c => src/lib/matrix_utils.c | 0 matrix_utils.h => src/lib/matrix_utils.h | 0 {meas => src/lib/meas}/Makefile.in | 0 {meas => src/lib/meas}/correlators.c | 0 {meas => src/lib/meas}/correlators.h | 0 {meas => src/lib/meas}/field_strength_types.h | 0 {meas => src/lib/meas}/gradient_flow.c | 0 {meas => src/lib/meas}/gradient_flow.h | 0 ...easure_clover_field_strength_observables.c | 0 ...easure_clover_field_strength_observables.h | 0 {meas => src/lib/meas}/measurements.c | 0 {meas => src/lib/meas}/measurements.h | 0 {meas => src/lib/meas}/oriented_plaquettes.c | 0 {meas => src/lib/meas}/oriented_plaquettes.h | 0 {meas => src/lib/meas}/pion_norm.c | 0 {meas => src/lib/meas}/pion_norm.h | 0 {meas => src/lib/meas}/polyakov_loop.c | 0 {meas => src/lib/meas}/polyakov_loop.h | 0 .../lib/measure_gauge_action.c | 0 .../lib/measure_gauge_action.h | 0 .../lib/measure_rectangles.c | 0 .../lib/measure_rectangles.h | 0 misc_types.h => src/lib/misc_types.h | 2 +- {monomial => src/lib/monomial}/Makefile.in | 0 .../lib/monomial}/clover_trlog_monomial.c | 0 .../lib/monomial}/clover_trlog_monomial.h | 0 .../lib/monomial}/cloverdet_monomial.c | 0 .../lib/monomial}/cloverdet_monomial.h | 0 .../lib/monomial}/cloverdetratio_monomial.c | 0 .../lib/monomial}/cloverdetratio_monomial.h | 0 .../lib/monomial}/cloverdetratio_rwmonomial.c | 0 .../lib/monomial}/cloverdetratio_rwmonomial.h | 0 .../lib/monomial}/clovernd_trlog_monomial.c | 0 .../lib/monomial}/clovernd_trlog_monomial.h | 0 .../lib/monomial}/cloverndpoly_monomial.c | 0 .../lib/monomial}/cloverndpoly_monomial.h | 0 {monomial => src/lib/monomial}/det_monomial.c | 0 {monomial => src/lib/monomial}/det_monomial.h | 0 .../lib/monomial}/detratio_monomial.c | 0 .../lib/monomial}/detratio_monomial.h | 0 .../lib/monomial}/gauge_monomial.c | 0 .../lib/monomial}/gauge_monomial.h | 0 .../lib/monomial}/moment_energy.c | 0 .../lib/monomial}/moment_energy.h | 0 .../lib/monomial}/monitor_forces.c | 0 .../lib/monomial}/monitor_forces.h | 0 {monomial => src/lib/monomial}/monomial.c | 0 {monomial => src/lib/monomial}/monomial.h | 0 .../lib/monomial}/nddetratio_monomial.c | 0 .../lib/monomial}/nddetratio_monomial.h | 0 .../lib/monomial}/ndpoly_monomial.c | 0 .../lib/monomial}/ndpoly_monomial.h | 0 .../lib/monomial}/ndrat_monomial.c | 0 .../lib/monomial}/ndrat_monomial.h | 0 .../lib/monomial}/ndratcor_monomial.c | 0 .../lib/monomial}/ndratcor_monomial.h | 0 .../lib/monomial}/poly_monomial.c | 0 .../lib/monomial}/poly_monomial.h | 0 {monomial => src/lib/monomial}/rat_monomial.c | 0 {monomial => src/lib/monomial}/rat_monomial.h | 0 .../lib/monomial}/ratcor_monomial.c | 0 .../lib/monomial}/ratcor_monomial.h | 0 mpi_init.c => src/lib/mpi_init.c | 0 mpi_init.h => src/lib/mpi_init.h | 0 .../lib/omp_accumulator.h | 0 operator.c => src/lib/operator.c | 0 operator.h => src/lib/operator.h | 0 .../lib/operator}/Block_D_psi_body.c | 0 {operator => src/lib/operator}/D_psi.c | 0 {operator => src/lib/operator}/D_psi.h | 0 {operator => src/lib/operator}/D_psi_body.c | 0 {operator => src/lib/operator}/Dov_proj.c | 0 {operator => src/lib/operator}/Dov_proj.h | 0 {operator => src/lib/operator}/Dov_psi.c | 0 {operator => src/lib/operator}/Dov_psi.h | 0 .../lib/operator}/Hopping_Matrix.c | 0 .../lib/operator}/Hopping_Matrix.h | 0 .../lib/operator}/Hopping_Matrix_32.c | 0 .../lib/operator}/Hopping_Matrix_32.h | 0 .../lib/operator}/Hopping_Matrix_32_nocom.c | 0 .../lib/operator}/Hopping_Matrix_nocom.c | 0 .../lib/operator}/Hopping_Matrix_nocom.h | 0 {operator => src/lib/operator}/Makefile.in | 0 .../assign_mul_one_sw_pm_imu_inv_block_body.c | 0 ...assign_mul_one_sw_pm_imu_site_lexic_body.c | 0 .../lib/operator}/clover_accumulate_deriv.c | 0 {operator => src/lib/operator}/clover_deriv.c | 0 {operator => src/lib/operator}/clover_det.c | 0 .../lib/operator}/clover_inline.h | 0 .../lib/operator}/clover_invert.c | 0 {operator => src/lib/operator}/clover_leaf.c | 0 {operator => src/lib/operator}/clover_leaf.h | 0 {operator => src/lib/operator}/clover_term.c | 0 .../lib/operator}/clovertm_operators.c | 0 .../lib/operator}/clovertm_operators.h | 0 .../lib/operator}/clovertm_operators_32.c | 0 .../lib/operator}/clovertm_operators_32.h | 0 .../lib/operator}/halfspinor_body.c | 0 .../lib/operator}/halfspinor_body_32.c | 0 .../lib/operator}/halfspinor_hopping.h | 0 .../lib/operator}/halfspinor_hopping_32.h | 0 .../lib/operator}/hopping_bg_dbl.c | 0 .../lib/operator}/hopping_body_dbl.c | 0 {operator => src/lib/operator}/hopping_sgl.c | 0 .../lib/operator}/mul_one_pm_imu_inv_body.c | 0 .../operator}/mul_one_pm_imu_sub_mul_body.c | 0 {operator => src/lib/operator}/tm_operators.c | 0 {operator => src/lib/operator}/tm_operators.h | 0 .../lib/operator}/tm_operators_32.c | 0 .../lib/operator}/tm_operators_32.h | 0 .../lib/operator}/tm_operators_nd.c | 0 .../lib/operator}/tm_operators_nd.h | 0 .../lib/operator}/tm_operators_nd_32.c | 0 .../lib/operator}/tm_operators_nd_32.h | 0 .../lib/operator}/tm_sub_Hopping_Matrix.c | 0 .../lib/operator}/tm_sub_Hopping_Matrix.h | 0 .../lib/operator}/tm_times_Hopping_Matrix.c | 0 .../lib/operator}/tm_times_Hopping_Matrix.h | 0 operator_types.h => src/lib/operator_types.h | 0 overrelaxation.c => src/lib/overrelaxation.c | 2 +- overrelaxation.h => src/lib/overrelaxation.h | 0 parallel_io.h => src/lib/parallel_io.h | 0 phmc.c => src/lib/phmc.c | 0 phmc.h => src/lib/phmc.h | 0 prepare_source.c => src/lib/prepare_source.c | 0 prepare_source.h => src/lib/prepare_source.h | 0 .../lib/profiling}/hmc/Readme.md | 0 .../lib/profiling}/hmc/example_profile.pdf | Bin .../lib/profiling}/hmc/profile.Rmd | 0 {profiling => src/lib/profiling}/hmc/timing.R | 0 .../lib/profiling}/hmc_mk2/.gitignore | 0 .../lib/profiling}/hmc_mk2/README.md | 0 .../profiling}/hmc_mk2/logs/example_log.out | 0 .../lib/profiling}/hmc_mk2/make_profile.R | 0 .../lib/profiling}/hmc_mk2/profile.Rmd | 0 src/lib/qphix/qphix_base_classes.hpp | 771 ++++++ src/lib/qphix/qphix_interface.cpp | 2192 +++++++++++++++++ src/lib/qphix/qphix_interface.hpp | 51 + src/lib/qphix/qphix_interface_utils.hpp | 33 + .../lib/qphix_interface.h | 0 qphix_types.h => src/lib/qphix_types.h | 0 qphix_veclen.h => src/lib/qphix_veclen.h | 0 .../lib/quda_dummy_types.h | 0 src/lib/quda_gauge_paths.inc | 158 ++ quda_interface.c => src/lib/quda_interface.c | 0 quda_interface.h => src/lib/quda_interface.h | 0 quda_types.h => src/lib/quda_types.h | 0 ranlxd.c => src/lib/ranlxd.c | 0 ranlxd.h => src/lib/ranlxd.h | 0 ranlxs.c => src/lib/ranlxs.c | 0 ranlxs.h => src/lib/ranlxs.h | 0 {rational => src/lib/rational}/Makefile.in | 0 {rational => src/lib/rational}/elliptic.c | 0 {rational => src/lib/rational}/elliptic.h | 0 {rational => src/lib/rational}/rational.c | 0 {rational => src/lib/rational}/rational.h | 0 {rational => src/lib/rational}/zolotarev.c | 0 {rational => src/lib/rational}/zolotarev.h | 0 read_input.h => src/lib/read_input.h | 0 read_input.l => src/lib/read_input.l | 0 .../lib/reweighting_factor.c | 0 .../lib/reweighting_factor.h | 0 .../lib/reweighting_factor_nd.c | 0 .../lib/reweighting_factor_nd.h | 0 .../lib/rnd_gauge_trafo.c | 0 .../lib/rnd_gauge_trafo.h | 0 sighandler.c => src/lib/sighandler.c | 0 sighandler.h => src/lib/sighandler.h | 0 {smearing => src/lib/smearing}/Makefile.in | 0 {smearing => src/lib/smearing}/ape.h | 0 {smearing => src/lib/smearing}/ape.ih | 0 .../lib/smearing}/ape_ape_smear.c | 0 {smearing => src/lib/smearing}/hex.h | 0 {smearing => src/lib/smearing}/hex.ih | 0 .../lib/smearing}/hex_hex_smear.c | 0 .../lib/smearing}/hex_stout_exclude_none.c | 0 .../lib/smearing}/hex_stout_exclude_one.c | 0 .../lib/smearing}/hex_stout_exclude_two.c | 0 {smearing => src/lib/smearing}/hyp.h | 0 {smearing => src/lib/smearing}/hyp.ih | 0 .../smearing}/hyp_APE_project_exclude_none.c | 0 .../smearing}/hyp_APE_project_exclude_one.c | 0 .../smearing}/hyp_APE_project_exclude_two.c | 0 .../lib/smearing}/hyp_hyp_smear.c | 0 .../smearing}/hyp_hyp_staples_exclude_none.c | 0 .../smearing}/hyp_hyp_staples_exclude_one.c | 0 .../smearing}/hyp_hyp_staples_exclude_two.c | 0 {smearing => src/lib/smearing}/stout.h | 0 {smearing => src/lib/smearing}/stout.ih | 0 .../lib/smearing}/stout_stout_smear.c | 0 .../smearing}/uils_print_config_to_screen.c | 0 {smearing => src/lib/smearing}/utils.h | 0 {smearing => src/lib/smearing}/utils.ih | 0 .../lib/smearing}/utils_generic_staples.c | 0 .../smearing}/utils_print_config_to_screen.c | 0 .../lib/smearing}/utils_print_su3.c | 0 .../lib/smearing}/utils_project_antiherm.c | 0 .../lib/smearing}/utils_project_herm.c | 0 .../lib/smearing}/utils_reunitarize.c | 0 .../lib/smearing}/utils_reunitarize_MILC.c | 8 +- .../lib/solver}/M_plus_block_psi_body.c | 0 {solver => src/lib/solver}/Makefile.in | 0 {solver => src/lib/solver}/Msap.c | 0 {solver => src/lib/solver}/Msap.h | 0 {solver => src/lib/solver}/bicg_complex.c | 0 {solver => src/lib/solver}/bicg_complex.h | 0 {solver => src/lib/solver}/bicgstab2.c | 0 {solver => src/lib/solver}/bicgstab2.h | 0 {solver => src/lib/solver}/bicgstab_complex.c | 0 {solver => src/lib/solver}/bicgstab_complex.h | 0 .../lib/solver}/bicgstab_complex_bi.c | 0 .../lib/solver}/bicgstab_complex_bi.h | 0 {solver => src/lib/solver}/bicgstabell.c | 0 {solver => src/lib/solver}/bicgstabell.h | 0 {solver => src/lib/solver}/cg_her.c | 0 {solver => src/lib/solver}/cg_her.h | 0 {solver => src/lib/solver}/cg_her_bi.c | 0 {solver => src/lib/solver}/cg_her_bi.h | 0 {solver => src/lib/solver}/cg_her_nd.c | 0 {solver => src/lib/solver}/cg_her_nd.h | 0 {solver => src/lib/solver}/cg_mms_tm.c | 0 {solver => src/lib/solver}/cg_mms_tm.h | 0 {solver => src/lib/solver}/cg_mms_tm_nd.c | 0 {solver => src/lib/solver}/cg_mms_tm_nd.h | 0 {solver => src/lib/solver}/cgne4complex.c | 0 {solver => src/lib/solver}/cgne4complex.h | 0 {solver => src/lib/solver}/cgs_real.c | 0 {solver => src/lib/solver}/cgs_real.h | 0 {solver => src/lib/solver}/chrono_guess.c | 0 {solver => src/lib/solver}/chrono_guess.h | 0 {solver => src/lib/solver}/cr.c | 0 {solver => src/lib/solver}/cr.h | 0 {solver => src/lib/solver}/dfl_projector.c | 0 {solver => src/lib/solver}/dfl_projector.h | 0 .../lib/solver}/diagonalise_general_matrix.c | 0 .../lib/solver}/diagonalise_general_matrix.h | 0 .../lib/solver}/dirac_operator_eigenvectors.c | 0 .../lib/solver}/dirac_operator_eigenvectors.h | 0 {solver => src/lib/solver}/eigcg.c | 0 {solver => src/lib/solver}/eigcg.h | 0 {solver => src/lib/solver}/eigenvalues.c | 0 {solver => src/lib/solver}/eigenvalues.h | 0 {solver => src/lib/solver}/eigenvalues_bi.c | 0 {solver => src/lib/solver}/eigenvalues_bi.h | 0 {solver => src/lib/solver}/fgmres.c | 0 {solver => src/lib/solver}/fgmres.h | 0 {solver => src/lib/solver}/fgmres4complex.c | 0 {solver => src/lib/solver}/fgmres4complex.h | 0 .../lib/solver}/fgmres4complex_body.c | 0 {solver => src/lib/solver}/gcr.c | 0 {solver => src/lib/solver}/gcr.h | 0 {solver => src/lib/solver}/gcr4complex.c | 0 {solver => src/lib/solver}/gcr4complex.h | 0 {solver => src/lib/solver}/gcr4complex_body.c | 0 {solver => src/lib/solver}/gcr4complex_body.h | 0 .../lib/solver}/generate_dfl_subspace.c | 0 .../lib/solver}/generate_dfl_subspace.h | 0 {solver => src/lib/solver}/gmres.c | 0 {solver => src/lib/solver}/gmres.h | 0 {solver => src/lib/solver}/gmres_dr.c | 0 {solver => src/lib/solver}/gmres_dr.h | 0 {solver => src/lib/solver}/gmres_precon.c | 0 {solver => src/lib/solver}/gmres_precon.h | 0 {solver => src/lib/solver}/gram-schmidt.c | 0 {solver => src/lib/solver}/gram-schmidt.h | 0 {solver => src/lib/solver}/incr_eigcg.c | 0 {solver => src/lib/solver}/incr_eigcg.h | 0 {solver => src/lib/solver}/index_jd.c | 0 {solver => src/lib/solver}/index_jd.h | 0 {solver => src/lib/solver}/init_guess.c | 0 {solver => src/lib/solver}/init_guess.h | 0 {solver => src/lib/solver}/jdher.c | 0 {solver => src/lib/solver}/jdher.h | 0 {solver => src/lib/solver}/jdher_bi.c | 0 {solver => src/lib/solver}/jdher_bi.h | 0 .../lib/solver}/little_mg_precon_body.c | 0 .../lib/solver}/little_project_eo_body.c | 0 {solver => src/lib/solver}/lu_solve.c | 0 {solver => src/lib/solver}/lu_solve.h | 0 .../lib/solver}/matrix_mult_typedef.h | 0 .../lib/solver}/matrix_mult_typedef_bi.h | 0 .../lib/solver}/matrix_mult_typedef_nd.h | 0 {solver => src/lib/solver}/mcr.c | 0 {solver => src/lib/solver}/mcr.h | 0 {solver => src/lib/solver}/mcr4complex.c | 0 {solver => src/lib/solver}/mcr4complex.h | 0 {solver => src/lib/solver}/mixed_cg_her.c | 0 {solver => src/lib/solver}/mixed_cg_her.h | 0 .../lib/solver}/mixed_cg_mms_tm_nd.c | 0 .../lib/solver}/mixed_cg_mms_tm_nd.h | 0 {solver => src/lib/solver}/monomial_solve.c | 0 {solver => src/lib/solver}/monomial_solve.h | 0 {solver => src/lib/solver}/mr.c | 0 {solver => src/lib/solver}/mr.h | 0 {solver => src/lib/solver}/mr4complex.c | 0 {solver => src/lib/solver}/mr4complex.h | 0 {solver => src/lib/solver}/mrblk_body.c | 0 {solver => src/lib/solver}/ortho.c | 0 {solver => src/lib/solver}/ortho.h | 0 {solver => src/lib/solver}/pcg_her.c | 0 {solver => src/lib/solver}/pcg_her.h | 0 {solver => src/lib/solver}/poly_precon.c | 0 {solver => src/lib/solver}/poly_precon.h | 0 {solver => src/lib/solver}/quicksort.c | 0 {solver => src/lib/solver}/quicksort.h | 0 {solver => src/lib/solver}/restart_X.c | 0 {solver => src/lib/solver}/restart_X.h | 0 {solver => src/lib/solver}/rg_mixed_cg_her.c | 0 {solver => src/lib/solver}/rg_mixed_cg_her.h | 0 .../lib/solver}/rg_mixed_cg_her_nd.c | 0 .../lib/solver}/rg_mixed_cg_her_nd.h | 0 .../lib/solver}/rg_mixed_cg_typedef.h | 0 {solver => src/lib/solver}/solver.h | 0 {solver => src/lib/solver}/solver_field.c | 0 {solver => src/lib/solver}/solver_field.h | 0 {solver => src/lib/solver}/solver_params.h | 0 {solver => src/lib/solver}/solver_types.c | 0 {solver => src/lib/solver}/solver_types.h | 0 {solver => src/lib/solver}/sub_low_ev.c | 0 {solver => src/lib/solver}/sub_low_ev.h | 0 {solver => src/lib/solver}/sumr.c | 0 {solver => src/lib/solver}/sumr.h | 0 .../lib/source_generation.c | 0 .../lib/source_generation.h | 0 spinor_fft.c => src/lib/spinor_fft.c | 0 spinor_fft.h => src/lib/spinor_fft.h | 0 start.c => src/lib/start.c | 0 start.h => src/lib/start.h | 0 .../lib/struct_accessors.h | 0 su3.h => src/lib/su3.h | 0 su3adj.h => src/lib/su3adj.h | 0 su3spinor.h => src/lib/su3spinor.h | 0 tensors.h => src/lib/tensors.h | 0 {test => src/lib/test}/Makefile | 0 {test => src/lib/test}/check_geometry.c | 0 {test => src/lib/test}/check_geometry.h | 0 {test => src/lib/test}/check_nan.c | 0 {test => src/lib/test}/check_nan.h | 0 {test => src/lib/test}/check_overlap.c | 0 {test => src/lib/test}/check_xchange.c | 0 {test => src/lib/test}/hopping_test.README | 0 .../lib/test}/hopping_test.input.compare | 0 {test => src/lib/test}/hopping_test.input.new | 0 .../lib/test}/hopping_test.input.start | 0 .../lib/test}/hopping_test_generate_script | 0 {test => src/lib/test}/hopping_test_qscript | 0 .../lib/test}/measure_rectangles.debug.c | 0 {test => src/lib/test}/overlaptests.c | 0 {test => src/lib/test}/overlaptests.h | 0 {test => src/lib/test}/qdran64.h | 0 .../lib/tm_debug_printf.c | 0 .../lib/tm_debug_printf.h | 0 .../lib/update_backward_gauge.c | 0 .../lib/update_backward_gauge.h | 0 update_gauge.c => src/lib/update_gauge.c | 0 update_gauge.h => src/lib/update_gauge.h | 0 update_momenta.c => src/lib/update_momenta.c | 0 update_momenta.h => src/lib/update_momenta.h | 0 .../lib/update_momenta_fg.c | 0 .../lib/update_momenta_fg.h | 0 update_tm.c => src/lib/update_tm.c | 0 update_tm.h => src/lib/update_tm.h | 0 {util => src/lib/util}/io.c | 0 {util => src/lib/util}/io.h | 0 {util => src/lib/util}/laguer/Makefile | 0 {util => src/lib/util}/laguer/chebyRoot.C | 0 {util => src/lib/util}/laguer/chebyRoot.H | 0 {util => src/lib/util}/laguer/laguer.c | 0 {util => src/lib/util}/laguer/quadroptRoot.C | 0 {util => src/lib/util}/oox/Makefile | 0 {util => src/lib/util}/oox/oox.c | 0 {util => src/lib/util}/oox/oox_gawrapper.cxx | 0 {util => src/lib/util}/oox/oox_gawrapper.h | 0 {util => src/lib/util}/swapendian.c | 0 {util => src/lib/util}/tmlqcd-indent | 0 {wrapper => src/lib/wrapper}/Makefile.in | 0 {wrapper => src/lib/wrapper}/lib_wrapper.c | 0 {xchange => src/lib/xchange}/Makefile.in | 0 .../lib/xchange}/little_field_gather.c | 0 .../lib/xchange}/little_field_gather.h | 0 .../lib/xchange}/little_field_gather_body.c | 0 {xchange => src/lib/xchange}/xchange.h | 0 .../lib/xchange}/xchange_2fields.c | 0 .../lib/xchange}/xchange_2fields.h | 0 {xchange => src/lib/xchange}/xchange_deri.c | 0 {xchange => src/lib/xchange}/xchange_deri.h | 0 {xchange => src/lib/xchange}/xchange_field.c | 0 {xchange => src/lib/xchange}/xchange_field.h | 0 {xchange => src/lib/xchange}/xchange_gauge.c | 0 {xchange => src/lib/xchange}/xchange_gauge.h | 0 .../lib/xchange}/xchange_halffield.c | 0 .../lib/xchange}/xchange_halffield.h | 0 .../lib/xchange}/xchange_lexicfield.c | 0 .../lib/xchange}/xchange_lexicfield.h | 0 708 files changed, 4614 insertions(+), 240 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 cmake/FindCLime.cmake create mode 100644 cmake/FindLemon.cmake create mode 100644 cmake/git_hash.h.in rename {include => cmake}/tmlqcd_config_internal.h.in (56%) create mode 100644 cmake_includes.txt delete mode 100644 io/Makefile.in create mode 100644 src/bin/CMakeLists.txt rename LapH_ev.c => src/bin/LapH_ev.c (100%) rename benchmark.c => src/bin/benchmark.c (100%) rename check_locallity.c => src/bin/check_locallity.c (99%) rename deriv_mg_tune.c => src/bin/deriv_mg_tune.c (100%) rename gen_sources.c => src/bin/gen_sources.c (100%) rename hmc_tm.c => src/bin/hmc_tm.c (100%) rename hopping_test.c => src/bin/hopping_test.c (100%) rename invert.c => src/bin/invert.c (100%) rename {util => src/bin}/main_ildg2uk.c (100%) rename offline_measurement.c => src/bin/offline_measurement.c (100%) rename qphix_test_Dslash.c => src/bin/qphix_test_Dslash.c (100%) rename {test => src/bin}/scalar_prod_r_test.c (100%) rename {test => src/bin}/test_eigenvalues.c (100%) rename test_lemon.c => src/bin/test_lemon.c (100%) create mode 100644 src/lib/CMakeLists.txt rename DDalphaAMG_interface.c => src/lib/DDalphaAMG_interface.c (100%) rename DDalphaAMG_interface.h => src/lib/DDalphaAMG_interface.h (100%) rename Ptilde_nd.c => src/lib/Ptilde_nd.c (100%) rename Ptilde_nd.h => src/lib/Ptilde_nd.h (100%) rename aligned_malloc.c => src/lib/aligned_malloc.c (100%) rename aligned_malloc.h => src/lib/aligned_malloc.h (100%) rename block.c => src/lib/block.c (100%) rename block.h => src/lib/block.h (100%) rename boundary.c => src/lib/boundary.c (100%) rename boundary.h => src/lib/boundary.h (100%) rename {buffers => src/lib/buffers}/Makefile.in (100%) rename {buffers => src/lib/buffers}/gauge.c (100%) rename {buffers => src/lib/buffers}/gauge.h (100%) rename {buffers => src/lib/buffers}/gauge.ih (100%) rename {buffers => src/lib/buffers}/gauge_allocate_gauge_buffers.c (100%) rename {buffers => src/lib/buffers}/gauge_finalize_gauge_buffers.c (100%) rename {buffers => src/lib/buffers}/gauge_free_unused_gauge_buffers.c (100%) rename {buffers => src/lib/buffers}/gauge_get_gauge_field.c (100%) rename {buffers => src/lib/buffers}/gauge_get_gauge_field_array.c (100%) rename {buffers => src/lib/buffers}/gauge_initialize_gauge_buffers.c (100%) rename {buffers => src/lib/buffers}/gauge_return_gauge_field.c (100%) rename {buffers => src/lib/buffers}/gauge_return_gauge_field_array.c (100%) rename {buffers => src/lib/buffers}/utils.h (100%) rename {buffers => src/lib/buffers}/utils.ih (100%) rename {buffers => src/lib/buffers}/utils_generic_exchange.blocking.inc (100%) rename {buffers => src/lib/buffers}/utils_generic_exchange.c (100%) rename {buffers => src/lib/buffers}/utils_generic_exchange.nonblocking.inc (100%) rename chebyshev_polynomial.c => src/lib/chebyshev_polynomial.c (98%) rename chebyshev_polynomial.h => src/lib/chebyshev_polynomial.h (100%) rename chebyshev_polynomial_nd.c => src/lib/chebyshev_polynomial_nd.c (100%) rename chebyshev_polynomial_nd.h => src/lib/chebyshev_polynomial_nd.h (100%) rename clenshaw_coef.c => src/lib/clenshaw_coef.c (100%) rename clenshaw_coef.h => src/lib/clenshaw_coef.h (100%) rename compare_derivative.c => src/lib/compare_derivative.c (100%) rename compare_derivative.h => src/lib/compare_derivative.h (100%) rename {cu => src/lib/cu}/COPYING (100%) rename {cu => src/lib/cu}/COPYING.LESSER (100%) rename {cu => src/lib/cu}/Makefile.in (100%) rename {cu => src/lib/cu}/check-regressions (100%) rename {cu => src/lib/cu}/cu.c (100%) rename {cu => src/lib/cu}/cu.h (100%) rename default_input_values.h => src/lib/default_input_values.h (100%) rename deriv_Sb.c => src/lib/deriv_Sb.c (100%) rename deriv_Sb.h => src/lib/deriv_Sb.h (100%) rename deriv_Sb_D_psi.c => src/lib/deriv_Sb_D_psi.c (100%) rename deriv_Sb_D_psi.h => src/lib/deriv_Sb_D_psi.h (100%) rename expo.c => src/lib/expo.c (100%) rename expo.h => src/lib/expo.h (100%) rename fatal_error.c => src/lib/fatal_error.c (100%) rename fatal_error.h => src/lib/fatal_error.h (100%) rename gamma.c => src/lib/gamma.c (100%) rename gamma.h => src/lib/gamma.h (100%) rename geometry_eo.c => src/lib/geometry_eo.c (100%) rename geometry_eo.h => src/lib/geometry_eo.h (100%) rename get_rectangle_staples.c => src/lib/get_rectangle_staples.c (100%) rename get_rectangle_staples.h => src/lib/get_rectangle_staples.h (100%) rename get_staples.c => src/lib/get_staples.c (100%) rename get_staples.h => src/lib/get_staples.h (100%) rename getopt.c => src/lib/getopt.c (100%) rename getopt.h => src/lib/getopt.h (100%) rename gettime.c => src/lib/gettime.c (100%) rename gettime.h => src/lib/gettime.h (100%) rename global.h => src/lib/global.h (100%) rename hamiltonian_field.h => src/lib/hamiltonian_field.h (100%) rename {include => src/lib/include}/tmLQCD.h (100%) rename {include => src/lib/include}/tmlqcd_config.h (100%) rename {init => src/lib/init}/Makefile.in (100%) rename {init => src/lib/init}/init.h (100%) rename {init => src/lib/init}/init_bispinor_field.c (100%) rename {init => src/lib/init}/init_bispinor_field.h (100%) rename {init => src/lib/init}/init_chi_spinor_field.c (100%) rename {init => src/lib/init}/init_chi_spinor_field.h (100%) rename {init => src/lib/init}/init_critical_globals.c (100%) rename {init => src/lib/init}/init_critical_globals.h (100%) rename {init => src/lib/init}/init_dirac_halfspinor.c (100%) rename {init => src/lib/init}/init_dirac_halfspinor.h (100%) rename {init => src/lib/init}/init_gauge_fg.c (100%) rename {init => src/lib/init}/init_gauge_fg.h (100%) rename {init => src/lib/init}/init_gauge_field.c (100%) rename {init => src/lib/init}/init_gauge_field.h (100%) rename {init => src/lib/init}/init_gauge_tmp.c (100%) rename {init => src/lib/init}/init_gauge_tmp.h (100%) rename {init => src/lib/init}/init_geometry_indices.c (100%) rename {init => src/lib/init}/init_geometry_indices.h (100%) rename {init => src/lib/init}/init_global_states.c (100%) rename {init => src/lib/init}/init_global_states.h (100%) rename {init => src/lib/init}/init_moment_field.c (100%) rename {init => src/lib/init}/init_moment_field.h (100%) rename {init => src/lib/init}/init_omp_accumulators.c (100%) rename {init => src/lib/init}/init_omp_accumulators.h (100%) rename {init => src/lib/init}/init_openmp.c (100%) rename {init => src/lib/init}/init_openmp.h (100%) rename {init => src/lib/init}/init_parallel.c (100%) rename {init => src/lib/init}/init_parallel.h (100%) rename {init => src/lib/init}/init_spinor_field.c (100%) rename {init => src/lib/init}/init_spinor_field.h (100%) rename {init => src/lib/init}/init_stout_smear_vars.c (100%) rename {init => src/lib/init}/init_stout_smear_vars.h (100%) rename integrator.c => src/lib/integrator.c (100%) rename integrator.h => src/lib/integrator.h (100%) rename invert_clover_eo.c => src/lib/invert_clover_eo.c (100%) rename invert_clover_eo.h => src/lib/invert_clover_eo.h (100%) rename invert_doublet_eo.c => src/lib/invert_doublet_eo.c (100%) rename invert_doublet_eo.h => src/lib/invert_doublet_eo.h (100%) rename invert_eo.c => src/lib/invert_eo.c (99%) rename invert_eo.h => src/lib/invert_eo.h (100%) rename invert_overlap.c => src/lib/invert_overlap.c (100%) rename invert_overlap.h => src/lib/invert_overlap.h (100%) rename {io => src/lib/io}/DML_crc32.c (100%) rename {io => src/lib/io}/deri_write_stdout.c (100%) rename {io => src/lib/io}/deri_write_stdout.h (100%) rename {io => src/lib/io}/dml.c (100%) rename {io => src/lib/io}/dml.h (100%) rename {io => src/lib/io}/eospinor.h (100%) rename {io => src/lib/io}/eospinor.ih (100%) rename {io => src/lib/io}/eospinor_read.c (100%) rename {io => src/lib/io}/eospinor_write.c (100%) rename {io => src/lib/io}/gauge.h (100%) rename {io => src/lib/io}/gauge.ih (100%) rename {io => src/lib/io}/gauge_read.c (100%) rename {io => src/lib/io}/gauge_read_binary.c (100%) rename {io => src/lib/io}/gauge_write.c (100%) rename {io => src/lib/io}/gauge_write_binary.c (100%) rename {io => src/lib/io}/gauge_write_luscher_binary.c (100%) rename {io => src/lib/io}/gauge_write_luscher_binary.h (100%) rename {io => src/lib/io}/io_cm.c (100%) rename {io => src/lib/io}/io_cm.h (100%) rename {io => src/lib/io}/params.h (100%) rename {io => src/lib/io}/params.ih (100%) rename {io => src/lib/io}/params_construct_InverterInfo.c (100%) rename {io => src/lib/io}/params_construct_ildgFormat.c (100%) rename {io => src/lib/io}/params_construct_propagatorFormat.c (100%) rename {io => src/lib/io}/params_construct_sourceFormat.c (100%) rename {io => src/lib/io}/params_construct_xlfInfo.c (100%) rename {io => src/lib/io}/selector.h (100%) rename {io => src/lib/io}/spinor.h (100%) rename {io => src/lib/io}/spinor.ih (100%) rename {io => src/lib/io}/spinor_read.c (100%) rename {io => src/lib/io}/spinor_read_binary.c (100%) rename {io => src/lib/io}/spinor_write.c (100%) rename {io => src/lib/io}/spinor_write_binary.c (100%) rename {io => src/lib/io}/spinor_write_info.c (100%) rename {io => src/lib/io}/spinor_write_propagator_format.c (100%) rename {io => src/lib/io}/spinor_write_propagator_type.c (100%) rename {io => src/lib/io}/spinor_write_source_format.c (100%) rename {io => src/lib/io}/spinor_write_stdout.c (100%) rename {io => src/lib/io}/spinor_write_stdout.h (100%) rename {io => src/lib/io}/sw_write_stdout.c (100%) rename {io => src/lib/io}/sw_write_stdout.h (100%) rename {io => src/lib/io}/utils.c (100%) rename {io => src/lib/io}/utils.h (99%) rename {io => src/lib/io}/utils.ih (96%) rename {io => src/lib/io}/utils_close_reader_record.c (100%) rename {io => src/lib/io}/utils_close_writer_record.c (100%) rename {io => src/lib/io}/utils_construct_reader.c (97%) rename {io => src/lib/io}/utils_construct_writer.c (100%) rename {io => src/lib/io}/utils_destruct_reader.c (100%) rename {io => src/lib/io}/utils_destruct_writer.c (100%) rename {io => src/lib/io}/utils_engineering.c (100%) rename {io => src/lib/io}/utils_kill_with_error.c (100%) rename {io => src/lib/io}/utils_parse_checksum_xml.c (100%) rename {io => src/lib/io}/utils_parse_ildgformat_xml.c (100%) rename {io => src/lib/io}/utils_parse_propagator_type.c (100%) rename {io => src/lib/io}/utils_read_message.c (100%) rename {io => src/lib/io}/utils_write_checksum.c (100%) rename {io => src/lib/io}/utils_write_first_message.c (100%) rename {io => src/lib/io}/utils_write_header.c (100%) rename {io => src/lib/io}/utils_write_ildg_format.c (100%) rename {io => src/lib/io}/utils_write_inverter_info.c (100%) rename {io => src/lib/io}/utils_write_message.c (100%) rename {io => src/lib/io}/utils_write_xlf.c (100%) rename {io => src/lib/io}/utils_write_xlf_xml.c (100%) rename kahan_summation.h => src/lib/kahan_summation.h (100%) rename {linalg => src/lib/linalg}/Makefile.in (100%) rename {linalg => src/lib/linalg}/add.c (100%) rename {linalg => src/lib/linalg}/add.h (100%) rename {linalg => src/lib/linalg}/addto_32.c (100%) rename {linalg => src/lib/linalg}/addto_32.h (100%) rename {linalg => src/lib/linalg}/assign.c (100%) rename {linalg => src/lib/linalg}/assign.h (100%) rename {linalg => src/lib/linalg}/assign_add_mul.c (100%) rename {linalg => src/lib/linalg}/assign_add_mul.h (100%) rename {linalg => src/lib/linalg}/assign_add_mul_add_mul.c (100%) rename {linalg => src/lib/linalg}/assign_add_mul_add_mul.h (100%) rename {linalg => src/lib/linalg}/assign_add_mul_add_mul_r.c (100%) rename {linalg => src/lib/linalg}/assign_add_mul_add_mul_r.h (100%) rename {linalg => src/lib/linalg}/assign_add_mul_body.c (100%) rename {linalg => src/lib/linalg}/assign_add_mul_r.c (100%) rename {linalg => src/lib/linalg}/assign_add_mul_r.h (100%) rename {linalg => src/lib/linalg}/assign_add_mul_r_32.c (93%) rename {linalg => src/lib/linalg}/assign_add_mul_r_32.h (100%) rename {linalg => src/lib/linalg}/assign_add_mul_r_add_mul.c (100%) rename {linalg => src/lib/linalg}/assign_add_mul_r_add_mul.h (100%) rename {linalg => src/lib/linalg}/assign_diff_mul.c (100%) rename {linalg => src/lib/linalg}/assign_diff_mul.h (100%) rename {linalg => src/lib/linalg}/assign_mul_add.c (100%) rename {linalg => src/lib/linalg}/assign_mul_add.h (100%) rename {linalg => src/lib/linalg}/assign_mul_add_mul.c (100%) rename {linalg => src/lib/linalg}/assign_mul_add_mul.h (100%) rename {linalg => src/lib/linalg}/assign_mul_add_mul_add_mul_add_mul_r.c (100%) rename {linalg => src/lib/linalg}/assign_mul_add_mul_add_mul_add_mul_r.h (100%) rename {linalg => src/lib/linalg}/assign_mul_add_mul_add_mul_r.c (100%) rename {linalg => src/lib/linalg}/assign_mul_add_mul_add_mul_r.h (100%) rename {linalg => src/lib/linalg}/assign_mul_add_mul_r.c (100%) rename {linalg => src/lib/linalg}/assign_mul_add_mul_r.h (100%) rename {linalg => src/lib/linalg}/assign_mul_add_mul_r_32.c (100%) rename {linalg => src/lib/linalg}/assign_mul_add_mul_r_32.h (100%) rename {linalg => src/lib/linalg}/assign_mul_add_r.c (100%) rename {linalg => src/lib/linalg}/assign_mul_add_r.h (100%) rename {linalg => src/lib/linalg}/assign_mul_add_r_32.c (100%) rename {linalg => src/lib/linalg}/assign_mul_add_r_32.h (100%) rename {linalg => src/lib/linalg}/assign_mul_add_r_and_square.c (100%) rename {linalg => src/lib/linalg}/assign_mul_add_r_and_square.h (100%) rename {linalg => src/lib/linalg}/assign_mul_bra_add_mul_ket_add.c (100%) rename {linalg => src/lib/linalg}/assign_mul_bra_add_mul_ket_add.h (100%) rename {linalg => src/lib/linalg}/assign_mul_bra_add_mul_ket_add_r.c (100%) rename {linalg => src/lib/linalg}/assign_mul_bra_add_mul_ket_add_r.h (100%) rename {linalg => src/lib/linalg}/assign_mul_bra_add_mul_r.c (100%) rename {linalg => src/lib/linalg}/assign_mul_bra_add_mul_r.h (100%) rename {linalg => src/lib/linalg}/assign_to_32.c (100%) rename {linalg => src/lib/linalg}/assign_to_32.h (100%) rename {linalg => src/lib/linalg}/blas.h (100%) rename {linalg => src/lib/linalg}/comp_decomp.c (100%) rename {linalg => src/lib/linalg}/comp_decomp.h (100%) rename {linalg => src/lib/linalg}/convert_eo_to_lexic.c (100%) rename {linalg => src/lib/linalg}/convert_eo_to_lexic.h (100%) rename {linalg => src/lib/linalg}/convert_even_to_lexic.c (100%) rename {linalg => src/lib/linalg}/convert_even_to_lexic.h (100%) rename {linalg => src/lib/linalg}/convert_odd_to_lexic.c (100%) rename {linalg => src/lib/linalg}/convert_odd_to_lexic.h (100%) rename {linalg => src/lib/linalg}/diff.c (100%) rename {linalg => src/lib/linalg}/diff.h (100%) rename {linalg => src/lib/linalg}/diff_32.c (100%) rename {linalg => src/lib/linalg}/diff_32.h (100%) rename {linalg => src/lib/linalg}/diff_and_square_norm.c (100%) rename {linalg => src/lib/linalg}/diff_and_square_norm.h (100%) rename {linalg => src/lib/linalg}/fortran.h (100%) rename {linalg => src/lib/linalg}/lapack.h (100%) rename {linalg => src/lib/linalg}/map_to_blas.h (100%) rename {linalg => src/lib/linalg}/mattimesvec.c (100%) rename {linalg => src/lib/linalg}/mattimesvec.h (100%) rename {linalg => src/lib/linalg}/mul.c (100%) rename {linalg => src/lib/linalg}/mul.h (100%) rename {linalg => src/lib/linalg}/mul_add_mul.c (100%) rename {linalg => src/lib/linalg}/mul_add_mul.h (100%) rename {linalg => src/lib/linalg}/mul_add_mul_r.c (100%) rename {linalg => src/lib/linalg}/mul_add_mul_r.h (100%) rename {linalg => src/lib/linalg}/mul_diff_mul.c (100%) rename {linalg => src/lib/linalg}/mul_diff_mul.h (100%) rename {linalg => src/lib/linalg}/mul_diff_mul_r.c (100%) rename {linalg => src/lib/linalg}/mul_diff_mul_r.h (100%) rename {linalg => src/lib/linalg}/mul_diff_r.c (100%) rename {linalg => src/lib/linalg}/mul_diff_r.h (100%) rename {linalg => src/lib/linalg}/mul_gamma5.c (100%) rename {linalg => src/lib/linalg}/mul_gamma5.h (100%) rename {linalg => src/lib/linalg}/mul_r.c (100%) rename {linalg => src/lib/linalg}/mul_r.h (100%) rename {linalg => src/lib/linalg}/mul_r_32.c (100%) rename {linalg => src/lib/linalg}/mul_r_32.h (100%) rename {linalg => src/lib/linalg}/mul_r_gamma5.c (100%) rename {linalg => src/lib/linalg}/mul_r_gamma5.h (100%) rename {linalg => src/lib/linalg}/print_spinor.c (100%) rename {linalg => src/lib/linalg}/print_spinor.h (100%) rename {linalg => src/lib/linalg}/print_spinor_similar_components.c (100%) rename {linalg => src/lib/linalg}/print_spinor_similar_components.h (100%) rename {linalg => src/lib/linalg}/ratio.c (100%) rename {linalg => src/lib/linalg}/ratio.h (100%) rename {linalg => src/lib/linalg}/scalar_prod.c (100%) rename {linalg => src/lib/linalg}/scalar_prod.h (100%) rename {linalg => src/lib/linalg}/scalar_prod_body.c (100%) rename {linalg => src/lib/linalg}/scalar_prod_i.c (100%) rename {linalg => src/lib/linalg}/scalar_prod_i.h (100%) rename {linalg => src/lib/linalg}/scalar_prod_r.c (100%) rename {linalg => src/lib/linalg}/scalar_prod_r.h (100%) rename {linalg => src/lib/linalg}/scalar_prod_r_32.c (100%) rename {linalg => src/lib/linalg}/scalar_prod_r_32.h (100%) rename {linalg => src/lib/linalg}/set_even_to_zero.c (100%) rename {linalg => src/lib/linalg}/set_even_to_zero.h (100%) rename {linalg => src/lib/linalg}/square_and_minmax.c (100%) rename {linalg => src/lib/linalg}/square_and_minmax.h (100%) rename {linalg => src/lib/linalg}/square_and_prod_r.c (100%) rename {linalg => src/lib/linalg}/square_and_prod_r.h (100%) rename {linalg => src/lib/linalg}/square_norm.c (100%) rename {linalg => src/lib/linalg}/square_norm.h (100%) rename {linalg => src/lib/linalg}/square_norm_32.c (100%) rename {linalg => src/lib/linalg}/square_norm_32.h (100%) rename linalg_eo.h => src/lib/linalg_eo.h (100%) rename little_D.c => src/lib/little_D.c (100%) rename little_D.h => src/lib/little_D.h (100%) rename little_D_body.c => src/lib/little_D_body.c (100%) rename matrix_utils.c => src/lib/matrix_utils.c (100%) rename matrix_utils.h => src/lib/matrix_utils.h (100%) rename {meas => src/lib/meas}/Makefile.in (100%) rename {meas => src/lib/meas}/correlators.c (100%) rename {meas => src/lib/meas}/correlators.h (100%) rename {meas => src/lib/meas}/field_strength_types.h (100%) rename {meas => src/lib/meas}/gradient_flow.c (100%) rename {meas => src/lib/meas}/gradient_flow.h (100%) rename {meas => src/lib/meas}/measure_clover_field_strength_observables.c (100%) rename {meas => src/lib/meas}/measure_clover_field_strength_observables.h (100%) rename {meas => src/lib/meas}/measurements.c (100%) rename {meas => src/lib/meas}/measurements.h (100%) rename {meas => src/lib/meas}/oriented_plaquettes.c (100%) rename {meas => src/lib/meas}/oriented_plaquettes.h (100%) rename {meas => src/lib/meas}/pion_norm.c (100%) rename {meas => src/lib/meas}/pion_norm.h (100%) rename {meas => src/lib/meas}/polyakov_loop.c (100%) rename {meas => src/lib/meas}/polyakov_loop.h (100%) rename measure_gauge_action.c => src/lib/measure_gauge_action.c (100%) rename measure_gauge_action.h => src/lib/measure_gauge_action.h (100%) rename measure_rectangles.c => src/lib/measure_rectangles.c (100%) rename measure_rectangles.h => src/lib/measure_rectangles.h (100%) rename misc_types.h => src/lib/misc_types.h (99%) rename {monomial => src/lib/monomial}/Makefile.in (100%) rename {monomial => src/lib/monomial}/clover_trlog_monomial.c (100%) rename {monomial => src/lib/monomial}/clover_trlog_monomial.h (100%) rename {monomial => src/lib/monomial}/cloverdet_monomial.c (100%) rename {monomial => src/lib/monomial}/cloverdet_monomial.h (100%) rename {monomial => src/lib/monomial}/cloverdetratio_monomial.c (100%) rename {monomial => src/lib/monomial}/cloverdetratio_monomial.h (100%) rename {monomial => src/lib/monomial}/cloverdetratio_rwmonomial.c (100%) rename {monomial => src/lib/monomial}/cloverdetratio_rwmonomial.h (100%) rename {monomial => src/lib/monomial}/clovernd_trlog_monomial.c (100%) rename {monomial => src/lib/monomial}/clovernd_trlog_monomial.h (100%) rename {monomial => src/lib/monomial}/cloverndpoly_monomial.c (100%) rename {monomial => src/lib/monomial}/cloverndpoly_monomial.h (100%) rename {monomial => src/lib/monomial}/det_monomial.c (100%) rename {monomial => src/lib/monomial}/det_monomial.h (100%) rename {monomial => src/lib/monomial}/detratio_monomial.c (100%) rename {monomial => src/lib/monomial}/detratio_monomial.h (100%) rename {monomial => src/lib/monomial}/gauge_monomial.c (100%) rename {monomial => src/lib/monomial}/gauge_monomial.h (100%) rename {monomial => src/lib/monomial}/moment_energy.c (100%) rename {monomial => src/lib/monomial}/moment_energy.h (100%) rename {monomial => src/lib/monomial}/monitor_forces.c (100%) rename {monomial => src/lib/monomial}/monitor_forces.h (100%) rename {monomial => src/lib/monomial}/monomial.c (100%) rename {monomial => src/lib/monomial}/monomial.h (100%) rename {monomial => src/lib/monomial}/nddetratio_monomial.c (100%) rename {monomial => src/lib/monomial}/nddetratio_monomial.h (100%) rename {monomial => src/lib/monomial}/ndpoly_monomial.c (100%) rename {monomial => src/lib/monomial}/ndpoly_monomial.h (100%) rename {monomial => src/lib/monomial}/ndrat_monomial.c (100%) rename {monomial => src/lib/monomial}/ndrat_monomial.h (100%) rename {monomial => src/lib/monomial}/ndratcor_monomial.c (100%) rename {monomial => src/lib/monomial}/ndratcor_monomial.h (100%) rename {monomial => src/lib/monomial}/poly_monomial.c (100%) rename {monomial => src/lib/monomial}/poly_monomial.h (100%) rename {monomial => src/lib/monomial}/rat_monomial.c (100%) rename {monomial => src/lib/monomial}/rat_monomial.h (100%) rename {monomial => src/lib/monomial}/ratcor_monomial.c (100%) rename {monomial => src/lib/monomial}/ratcor_monomial.h (100%) rename mpi_init.c => src/lib/mpi_init.c (100%) rename mpi_init.h => src/lib/mpi_init.h (100%) rename omp_accumulator.h => src/lib/omp_accumulator.h (100%) rename operator.c => src/lib/operator.c (100%) rename operator.h => src/lib/operator.h (100%) rename {operator => src/lib/operator}/Block_D_psi_body.c (100%) rename {operator => src/lib/operator}/D_psi.c (100%) rename {operator => src/lib/operator}/D_psi.h (100%) rename {operator => src/lib/operator}/D_psi_body.c (100%) rename {operator => src/lib/operator}/Dov_proj.c (100%) rename {operator => src/lib/operator}/Dov_proj.h (100%) rename {operator => src/lib/operator}/Dov_psi.c (100%) rename {operator => src/lib/operator}/Dov_psi.h (100%) rename {operator => src/lib/operator}/Hopping_Matrix.c (100%) rename {operator => src/lib/operator}/Hopping_Matrix.h (100%) rename {operator => src/lib/operator}/Hopping_Matrix_32.c (100%) rename {operator => src/lib/operator}/Hopping_Matrix_32.h (100%) rename {operator => src/lib/operator}/Hopping_Matrix_32_nocom.c (100%) rename {operator => src/lib/operator}/Hopping_Matrix_nocom.c (100%) rename {operator => src/lib/operator}/Hopping_Matrix_nocom.h (100%) rename {operator => src/lib/operator}/Makefile.in (100%) rename {operator => src/lib/operator}/assign_mul_one_sw_pm_imu_inv_block_body.c (100%) rename {operator => src/lib/operator}/assign_mul_one_sw_pm_imu_site_lexic_body.c (100%) rename {operator => src/lib/operator}/clover_accumulate_deriv.c (100%) rename {operator => src/lib/operator}/clover_deriv.c (100%) rename {operator => src/lib/operator}/clover_det.c (100%) rename {operator => src/lib/operator}/clover_inline.h (100%) rename {operator => src/lib/operator}/clover_invert.c (100%) rename {operator => src/lib/operator}/clover_leaf.c (100%) rename {operator => src/lib/operator}/clover_leaf.h (100%) rename {operator => src/lib/operator}/clover_term.c (100%) rename {operator => src/lib/operator}/clovertm_operators.c (100%) rename {operator => src/lib/operator}/clovertm_operators.h (100%) rename {operator => src/lib/operator}/clovertm_operators_32.c (100%) rename {operator => src/lib/operator}/clovertm_operators_32.h (100%) rename {operator => src/lib/operator}/halfspinor_body.c (100%) rename {operator => src/lib/operator}/halfspinor_body_32.c (100%) rename {operator => src/lib/operator}/halfspinor_hopping.h (100%) rename {operator => src/lib/operator}/halfspinor_hopping_32.h (100%) rename {operator => src/lib/operator}/hopping_bg_dbl.c (100%) rename {operator => src/lib/operator}/hopping_body_dbl.c (100%) rename {operator => src/lib/operator}/hopping_sgl.c (100%) rename {operator => src/lib/operator}/mul_one_pm_imu_inv_body.c (100%) rename {operator => src/lib/operator}/mul_one_pm_imu_sub_mul_body.c (100%) rename {operator => src/lib/operator}/tm_operators.c (100%) rename {operator => src/lib/operator}/tm_operators.h (100%) rename {operator => src/lib/operator}/tm_operators_32.c (100%) rename {operator => src/lib/operator}/tm_operators_32.h (100%) rename {operator => src/lib/operator}/tm_operators_nd.c (100%) rename {operator => src/lib/operator}/tm_operators_nd.h (100%) rename {operator => src/lib/operator}/tm_operators_nd_32.c (100%) rename {operator => src/lib/operator}/tm_operators_nd_32.h (100%) rename {operator => src/lib/operator}/tm_sub_Hopping_Matrix.c (100%) rename {operator => src/lib/operator}/tm_sub_Hopping_Matrix.h (100%) rename {operator => src/lib/operator}/tm_times_Hopping_Matrix.c (100%) rename {operator => src/lib/operator}/tm_times_Hopping_Matrix.h (100%) rename operator_types.h => src/lib/operator_types.h (100%) rename overrelaxation.c => src/lib/overrelaxation.c (99%) rename overrelaxation.h => src/lib/overrelaxation.h (100%) rename parallel_io.h => src/lib/parallel_io.h (100%) rename phmc.c => src/lib/phmc.c (100%) rename phmc.h => src/lib/phmc.h (100%) rename prepare_source.c => src/lib/prepare_source.c (100%) rename prepare_source.h => src/lib/prepare_source.h (100%) rename {profiling => src/lib/profiling}/hmc/Readme.md (100%) rename {profiling => src/lib/profiling}/hmc/example_profile.pdf (100%) rename {profiling => src/lib/profiling}/hmc/profile.Rmd (100%) rename {profiling => src/lib/profiling}/hmc/timing.R (100%) rename {profiling => src/lib/profiling}/hmc_mk2/.gitignore (100%) rename {profiling => src/lib/profiling}/hmc_mk2/README.md (100%) rename {profiling => src/lib/profiling}/hmc_mk2/logs/example_log.out (100%) rename {profiling => src/lib/profiling}/hmc_mk2/make_profile.R (100%) rename {profiling => src/lib/profiling}/hmc_mk2/profile.Rmd (100%) create mode 100644 src/lib/qphix/qphix_base_classes.hpp create mode 100644 src/lib/qphix/qphix_interface.cpp create mode 100644 src/lib/qphix/qphix_interface.hpp create mode 100644 src/lib/qphix/qphix_interface_utils.hpp rename qphix_interface.h => src/lib/qphix_interface.h (100%) rename qphix_types.h => src/lib/qphix_types.h (100%) rename qphix_veclen.h => src/lib/qphix_veclen.h (100%) rename quda_dummy_types.h => src/lib/quda_dummy_types.h (100%) create mode 100644 src/lib/quda_gauge_paths.inc rename quda_interface.c => src/lib/quda_interface.c (100%) rename quda_interface.h => src/lib/quda_interface.h (100%) rename quda_types.h => src/lib/quda_types.h (100%) rename ranlxd.c => src/lib/ranlxd.c (100%) rename ranlxd.h => src/lib/ranlxd.h (100%) rename ranlxs.c => src/lib/ranlxs.c (100%) rename ranlxs.h => src/lib/ranlxs.h (100%) rename {rational => src/lib/rational}/Makefile.in (100%) rename {rational => src/lib/rational}/elliptic.c (100%) rename {rational => src/lib/rational}/elliptic.h (100%) rename {rational => src/lib/rational}/rational.c (100%) rename {rational => src/lib/rational}/rational.h (100%) rename {rational => src/lib/rational}/zolotarev.c (100%) rename {rational => src/lib/rational}/zolotarev.h (100%) rename read_input.h => src/lib/read_input.h (100%) rename read_input.l => src/lib/read_input.l (100%) rename reweighting_factor.c => src/lib/reweighting_factor.c (100%) rename reweighting_factor.h => src/lib/reweighting_factor.h (100%) rename reweighting_factor_nd.c => src/lib/reweighting_factor_nd.c (100%) rename reweighting_factor_nd.h => src/lib/reweighting_factor_nd.h (100%) rename rnd_gauge_trafo.c => src/lib/rnd_gauge_trafo.c (100%) rename rnd_gauge_trafo.h => src/lib/rnd_gauge_trafo.h (100%) rename sighandler.c => src/lib/sighandler.c (100%) rename sighandler.h => src/lib/sighandler.h (100%) rename {smearing => src/lib/smearing}/Makefile.in (100%) rename {smearing => src/lib/smearing}/ape.h (100%) rename {smearing => src/lib/smearing}/ape.ih (100%) rename {smearing => src/lib/smearing}/ape_ape_smear.c (100%) rename {smearing => src/lib/smearing}/hex.h (100%) rename {smearing => src/lib/smearing}/hex.ih (100%) rename {smearing => src/lib/smearing}/hex_hex_smear.c (100%) rename {smearing => src/lib/smearing}/hex_stout_exclude_none.c (100%) rename {smearing => src/lib/smearing}/hex_stout_exclude_one.c (100%) rename {smearing => src/lib/smearing}/hex_stout_exclude_two.c (100%) rename {smearing => src/lib/smearing}/hyp.h (100%) rename {smearing => src/lib/smearing}/hyp.ih (100%) rename {smearing => src/lib/smearing}/hyp_APE_project_exclude_none.c (100%) rename {smearing => src/lib/smearing}/hyp_APE_project_exclude_one.c (100%) rename {smearing => src/lib/smearing}/hyp_APE_project_exclude_two.c (100%) rename {smearing => src/lib/smearing}/hyp_hyp_smear.c (100%) rename {smearing => src/lib/smearing}/hyp_hyp_staples_exclude_none.c (100%) rename {smearing => src/lib/smearing}/hyp_hyp_staples_exclude_one.c (100%) rename {smearing => src/lib/smearing}/hyp_hyp_staples_exclude_two.c (100%) rename {smearing => src/lib/smearing}/stout.h (100%) rename {smearing => src/lib/smearing}/stout.ih (100%) rename {smearing => src/lib/smearing}/stout_stout_smear.c (100%) rename {smearing => src/lib/smearing}/uils_print_config_to_screen.c (100%) rename {smearing => src/lib/smearing}/utils.h (100%) rename {smearing => src/lib/smearing}/utils.ih (100%) rename {smearing => src/lib/smearing}/utils_generic_staples.c (100%) rename {smearing => src/lib/smearing}/utils_print_config_to_screen.c (100%) rename {smearing => src/lib/smearing}/utils_print_su3.c (100%) rename {smearing => src/lib/smearing}/utils_project_antiherm.c (100%) rename {smearing => src/lib/smearing}/utils_project_herm.c (100%) rename {smearing => src/lib/smearing}/utils_reunitarize.c (100%) rename {smearing => src/lib/smearing}/utils_reunitarize_MILC.c (88%) rename {solver => src/lib/solver}/M_plus_block_psi_body.c (100%) rename {solver => src/lib/solver}/Makefile.in (100%) rename {solver => src/lib/solver}/Msap.c (100%) rename {solver => src/lib/solver}/Msap.h (100%) rename {solver => src/lib/solver}/bicg_complex.c (100%) rename {solver => src/lib/solver}/bicg_complex.h (100%) rename {solver => src/lib/solver}/bicgstab2.c (100%) rename {solver => src/lib/solver}/bicgstab2.h (100%) rename {solver => src/lib/solver}/bicgstab_complex.c (100%) rename {solver => src/lib/solver}/bicgstab_complex.h (100%) rename {solver => src/lib/solver}/bicgstab_complex_bi.c (100%) rename {solver => src/lib/solver}/bicgstab_complex_bi.h (100%) rename {solver => src/lib/solver}/bicgstabell.c (100%) rename {solver => src/lib/solver}/bicgstabell.h (100%) rename {solver => src/lib/solver}/cg_her.c (100%) rename {solver => src/lib/solver}/cg_her.h (100%) rename {solver => src/lib/solver}/cg_her_bi.c (100%) rename {solver => src/lib/solver}/cg_her_bi.h (100%) rename {solver => src/lib/solver}/cg_her_nd.c (100%) rename {solver => src/lib/solver}/cg_her_nd.h (100%) rename {solver => src/lib/solver}/cg_mms_tm.c (100%) rename {solver => src/lib/solver}/cg_mms_tm.h (100%) rename {solver => src/lib/solver}/cg_mms_tm_nd.c (100%) rename {solver => src/lib/solver}/cg_mms_tm_nd.h (100%) rename {solver => src/lib/solver}/cgne4complex.c (100%) rename {solver => src/lib/solver}/cgne4complex.h (100%) rename {solver => src/lib/solver}/cgs_real.c (100%) rename {solver => src/lib/solver}/cgs_real.h (100%) rename {solver => src/lib/solver}/chrono_guess.c (100%) rename {solver => src/lib/solver}/chrono_guess.h (100%) rename {solver => src/lib/solver}/cr.c (100%) rename {solver => src/lib/solver}/cr.h (100%) rename {solver => src/lib/solver}/dfl_projector.c (100%) rename {solver => src/lib/solver}/dfl_projector.h (100%) rename {solver => src/lib/solver}/diagonalise_general_matrix.c (100%) rename {solver => src/lib/solver}/diagonalise_general_matrix.h (100%) rename {solver => src/lib/solver}/dirac_operator_eigenvectors.c (100%) rename {solver => src/lib/solver}/dirac_operator_eigenvectors.h (100%) rename {solver => src/lib/solver}/eigcg.c (100%) rename {solver => src/lib/solver}/eigcg.h (100%) rename {solver => src/lib/solver}/eigenvalues.c (100%) rename {solver => src/lib/solver}/eigenvalues.h (100%) rename {solver => src/lib/solver}/eigenvalues_bi.c (100%) rename {solver => src/lib/solver}/eigenvalues_bi.h (100%) rename {solver => src/lib/solver}/fgmres.c (100%) rename {solver => src/lib/solver}/fgmres.h (100%) rename {solver => src/lib/solver}/fgmres4complex.c (100%) rename {solver => src/lib/solver}/fgmres4complex.h (100%) rename {solver => src/lib/solver}/fgmres4complex_body.c (100%) rename {solver => src/lib/solver}/gcr.c (100%) rename {solver => src/lib/solver}/gcr.h (100%) rename {solver => src/lib/solver}/gcr4complex.c (100%) rename {solver => src/lib/solver}/gcr4complex.h (100%) rename {solver => src/lib/solver}/gcr4complex_body.c (100%) rename {solver => src/lib/solver}/gcr4complex_body.h (100%) rename {solver => src/lib/solver}/generate_dfl_subspace.c (100%) rename {solver => src/lib/solver}/generate_dfl_subspace.h (100%) rename {solver => src/lib/solver}/gmres.c (100%) rename {solver => src/lib/solver}/gmres.h (100%) rename {solver => src/lib/solver}/gmres_dr.c (100%) rename {solver => src/lib/solver}/gmres_dr.h (100%) rename {solver => src/lib/solver}/gmres_precon.c (100%) rename {solver => src/lib/solver}/gmres_precon.h (100%) rename {solver => src/lib/solver}/gram-schmidt.c (100%) rename {solver => src/lib/solver}/gram-schmidt.h (100%) rename {solver => src/lib/solver}/incr_eigcg.c (100%) rename {solver => src/lib/solver}/incr_eigcg.h (100%) rename {solver => src/lib/solver}/index_jd.c (100%) rename {solver => src/lib/solver}/index_jd.h (100%) rename {solver => src/lib/solver}/init_guess.c (100%) rename {solver => src/lib/solver}/init_guess.h (100%) rename {solver => src/lib/solver}/jdher.c (100%) rename {solver => src/lib/solver}/jdher.h (100%) rename {solver => src/lib/solver}/jdher_bi.c (100%) rename {solver => src/lib/solver}/jdher_bi.h (100%) rename {solver => src/lib/solver}/little_mg_precon_body.c (100%) rename {solver => src/lib/solver}/little_project_eo_body.c (100%) rename {solver => src/lib/solver}/lu_solve.c (100%) rename {solver => src/lib/solver}/lu_solve.h (100%) rename {solver => src/lib/solver}/matrix_mult_typedef.h (100%) rename {solver => src/lib/solver}/matrix_mult_typedef_bi.h (100%) rename {solver => src/lib/solver}/matrix_mult_typedef_nd.h (100%) rename {solver => src/lib/solver}/mcr.c (100%) rename {solver => src/lib/solver}/mcr.h (100%) rename {solver => src/lib/solver}/mcr4complex.c (100%) rename {solver => src/lib/solver}/mcr4complex.h (100%) rename {solver => src/lib/solver}/mixed_cg_her.c (100%) rename {solver => src/lib/solver}/mixed_cg_her.h (100%) rename {solver => src/lib/solver}/mixed_cg_mms_tm_nd.c (100%) rename {solver => src/lib/solver}/mixed_cg_mms_tm_nd.h (100%) rename {solver => src/lib/solver}/monomial_solve.c (100%) rename {solver => src/lib/solver}/monomial_solve.h (100%) rename {solver => src/lib/solver}/mr.c (100%) rename {solver => src/lib/solver}/mr.h (100%) rename {solver => src/lib/solver}/mr4complex.c (100%) rename {solver => src/lib/solver}/mr4complex.h (100%) rename {solver => src/lib/solver}/mrblk_body.c (100%) rename {solver => src/lib/solver}/ortho.c (100%) rename {solver => src/lib/solver}/ortho.h (100%) rename {solver => src/lib/solver}/pcg_her.c (100%) rename {solver => src/lib/solver}/pcg_her.h (100%) rename {solver => src/lib/solver}/poly_precon.c (100%) rename {solver => src/lib/solver}/poly_precon.h (100%) rename {solver => src/lib/solver}/quicksort.c (100%) rename {solver => src/lib/solver}/quicksort.h (100%) rename {solver => src/lib/solver}/restart_X.c (100%) rename {solver => src/lib/solver}/restart_X.h (100%) rename {solver => src/lib/solver}/rg_mixed_cg_her.c (100%) rename {solver => src/lib/solver}/rg_mixed_cg_her.h (100%) rename {solver => src/lib/solver}/rg_mixed_cg_her_nd.c (100%) rename {solver => src/lib/solver}/rg_mixed_cg_her_nd.h (100%) rename {solver => src/lib/solver}/rg_mixed_cg_typedef.h (100%) rename {solver => src/lib/solver}/solver.h (100%) rename {solver => src/lib/solver}/solver_field.c (100%) rename {solver => src/lib/solver}/solver_field.h (100%) rename {solver => src/lib/solver}/solver_params.h (100%) rename {solver => src/lib/solver}/solver_types.c (100%) rename {solver => src/lib/solver}/solver_types.h (100%) rename {solver => src/lib/solver}/sub_low_ev.c (100%) rename {solver => src/lib/solver}/sub_low_ev.h (100%) rename {solver => src/lib/solver}/sumr.c (100%) rename {solver => src/lib/solver}/sumr.h (100%) rename source_generation.c => src/lib/source_generation.c (100%) rename source_generation.h => src/lib/source_generation.h (100%) rename spinor_fft.c => src/lib/spinor_fft.c (100%) rename spinor_fft.h => src/lib/spinor_fft.h (100%) rename start.c => src/lib/start.c (100%) rename start.h => src/lib/start.h (100%) rename struct_accessors.h => src/lib/struct_accessors.h (100%) rename su3.h => src/lib/su3.h (100%) rename su3adj.h => src/lib/su3adj.h (100%) rename su3spinor.h => src/lib/su3spinor.h (100%) rename tensors.h => src/lib/tensors.h (100%) rename {test => src/lib/test}/Makefile (100%) rename {test => src/lib/test}/check_geometry.c (100%) rename {test => src/lib/test}/check_geometry.h (100%) rename {test => src/lib/test}/check_nan.c (100%) rename {test => src/lib/test}/check_nan.h (100%) rename {test => src/lib/test}/check_overlap.c (100%) rename {test => src/lib/test}/check_xchange.c (100%) rename {test => src/lib/test}/hopping_test.README (100%) rename {test => src/lib/test}/hopping_test.input.compare (100%) rename {test => src/lib/test}/hopping_test.input.new (100%) rename {test => src/lib/test}/hopping_test.input.start (100%) rename {test => src/lib/test}/hopping_test_generate_script (100%) rename {test => src/lib/test}/hopping_test_qscript (100%) rename {test => src/lib/test}/measure_rectangles.debug.c (100%) rename {test => src/lib/test}/overlaptests.c (100%) rename {test => src/lib/test}/overlaptests.h (100%) rename {test => src/lib/test}/qdran64.h (100%) rename tm_debug_printf.c => src/lib/tm_debug_printf.c (100%) rename tm_debug_printf.h => src/lib/tm_debug_printf.h (100%) rename update_backward_gauge.c => src/lib/update_backward_gauge.c (100%) rename update_backward_gauge.h => src/lib/update_backward_gauge.h (100%) rename update_gauge.c => src/lib/update_gauge.c (100%) rename update_gauge.h => src/lib/update_gauge.h (100%) rename update_momenta.c => src/lib/update_momenta.c (100%) rename update_momenta.h => src/lib/update_momenta.h (100%) rename update_momenta_fg.c => src/lib/update_momenta_fg.c (100%) rename update_momenta_fg.h => src/lib/update_momenta_fg.h (100%) rename update_tm.c => src/lib/update_tm.c (100%) rename update_tm.h => src/lib/update_tm.h (100%) rename {util => src/lib/util}/io.c (100%) rename {util => src/lib/util}/io.h (100%) rename {util => src/lib/util}/laguer/Makefile (100%) rename {util => src/lib/util}/laguer/chebyRoot.C (100%) rename {util => src/lib/util}/laguer/chebyRoot.H (100%) rename {util => src/lib/util}/laguer/laguer.c (100%) rename {util => src/lib/util}/laguer/quadroptRoot.C (100%) rename {util => src/lib/util}/oox/Makefile (100%) rename {util => src/lib/util}/oox/oox.c (100%) rename {util => src/lib/util}/oox/oox_gawrapper.cxx (100%) rename {util => src/lib/util}/oox/oox_gawrapper.h (100%) rename {util => src/lib/util}/swapendian.c (100%) rename {util => src/lib/util}/tmlqcd-indent (100%) rename {wrapper => src/lib/wrapper}/Makefile.in (100%) rename {wrapper => src/lib/wrapper}/lib_wrapper.c (100%) rename {xchange => src/lib/xchange}/Makefile.in (100%) rename {xchange => src/lib/xchange}/little_field_gather.c (100%) rename {xchange => src/lib/xchange}/little_field_gather.h (100%) rename {xchange => src/lib/xchange}/little_field_gather_body.c (100%) rename {xchange => src/lib/xchange}/xchange.h (100%) rename {xchange => src/lib/xchange}/xchange_2fields.c (100%) rename {xchange => src/lib/xchange}/xchange_2fields.h (100%) rename {xchange => src/lib/xchange}/xchange_deri.c (100%) rename {xchange => src/lib/xchange}/xchange_deri.h (100%) rename {xchange => src/lib/xchange}/xchange_field.c (100%) rename {xchange => src/lib/xchange}/xchange_field.h (100%) rename {xchange => src/lib/xchange}/xchange_gauge.c (100%) rename {xchange => src/lib/xchange}/xchange_gauge.h (100%) rename {xchange => src/lib/xchange}/xchange_halffield.c (100%) rename {xchange => src/lib/xchange}/xchange_halffield.h (100%) rename {xchange => src/lib/xchange}/xchange_lexicfield.c (100%) rename {xchange => src/lib/xchange}/xchange_lexicfield.h (100%) diff --git a/.gitignore b/.gitignore index 79e2bc1b2..0a2e35fba 100644 --- a/.gitignore +++ b/.gitignore @@ -18,7 +18,6 @@ tags* hmc_tm invert offline_measurement -lib/ benchmark *.data *.para diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..9dc9f71f2 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,390 @@ +cmake_minimum_required(VERSION 3.24) + +project( + tmlqcd + DESCRIPTION "tmlQCD" + HOMEPAGE_URL "http://www.itkp.uni-bonn.de/~urbach/software.html" + VERSION "6.0.0" + LANGUAGES C CXX) + +# include our cmake snippets +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) + +# ================================================================================================= +# REQUIRE OUT-OF-SOURCE BUILDS +file(TO_CMAKE_PATH "${PROJECT_BINARY_DIR}/CMakeLists.txt" LOC_PATH) +if(EXISTS "${LOC_PATH}") + message( + FATAL_ERROR + "You cannot build in a source directory (or any directory with a CMakeLists.txt file). Please make a build subdirectory." + ) +endif() + +# ================================================================================================= +# PROJECT AND VERSION +include(CMakeDependentOption) +include(CheckSymbolExists) +include(CheckLibraryExists) +include(CheckFunctionExists) +include(GNUInstallDirs) + +cmake_policy(SET CMP0048 NEW) + +if(POLICY CMP0144) + cmake_policy(SET CMP0144 NEW) +endif() + +if(NOT DEFINED CMAKE_CUDA_STANDARD) + set(CMAKE_CUDA_STANDARD 14) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) +endif() + +if(NOT DEFINED CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD_REQUIRED ON) +endif() + +if(NOT DEFINED CMAKE_C_STANDARD) + set(CMAKE_C_STANDARD 11) + set(CMAKE_C_STANDARD_REQUIRED ON) +endif() + +if(NOT DEFINED CMAKE_HIP_STANDARD) + set(CMAKE_HIP_STANDARD 14) + set(CMAKE_HIP_STANDARD_REQUIRED ON) +endif() + +find_package(PkgConfig) + +# ############################################################################## +# Define the paths for static libraries and executables +# ############################################################################## +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY + ${cp2k_BINARY_DIR}/lib + CACHE PATH "Single output directory for building all libraries.") + +# Search for rocm in common locations +foreach(__var ROCM_ROOT CRAY_ROCM_ROOT ORNL_ROCM_ROOT CRAY_ROCM_PREFIX + ROCM_PREFIX CRAY_ROCM_DIR) + if($ENV{${__var}}) + list(APPEND CMAKE_PREFIX_PATH $ENV{__var}) + set(ROCM_PATH + $ENV{__var} + CACHE PATH "Path to ROCm installation") + endif() +endforeach() + +option(CMAKE_POSITION_INDEPENDENT_CODE "Enable position independent code" ON) +option(BUILD_SHARED_LIBS "Enable shared library" ON) +option(TM_USE_FFTW "Enable fftw support" OFF) +option(TM_USE_MPI "Enable MPI support" OFF) +option(TM_USE_CUDA "Enable QUDA support" OFF) +option(TM_USE_HIP "Enable HIP support" OFF) +option(TM_USE_DDALPHAAMG "Enable DDalphaAMG support" OFF) +option(TM_USE_OPENMP "Enable openMP" ON) +option(TM_FIXED_VOLUME "fix volume at compile time" OFF) +set( + TM_ENABLE_ALIGNMENT + "auto" + CACHE STRING "Automatically or expliclty align arrays to byte number. auto, none, 16, 32, 64") + +set_property( + CACHE TM_ENABLE_ALIGNMENT + PROPERTY STRINGS + "auto" + "none" + "16" + "32" + "64") + +option(TM_BGL_DRAM "use BGL dram window (BGL only!)" ON) +option(TM_USE_OPTIMIZATION "enable optimisation" ON) +option(TM_USE_GAUGE_COPY "Enable use of a copy of the gauge field" ON) +option(TM_USE_HALFSPINOR "Use a Dirac Op. with halfspinor exchange" ON) +option(TM_USE_TSPLITPAR "Enable timeslice-splitted communications" ON) +option(TM_USE_QPHIX "enable QPhiX" OFF) +option(TM_USE_SHMEM "Use shmem API" OFF) +option(TM_USE_QUDA "Enable QUDA support" OFF) +option(TM_USE_GPROF "Enable gprof profiler" OFF) +option(TM_ENABLE_WARNINGS "Enable all warnings" ON) + +# MPI dependent options +cmake_dependent_option( + TM_PERSISTENT_MPI "Use persistent MPI calls for halfspinor [default=no]" + OFF "TM_USE_MPI" OFF) +cmake_dependent_option( + TM_NONBLOCKING_MPI "Use non-blocking MPI calls for spinor and gaug" ON + "TM_USE_MPI" OFF) + +# need to do it properly. Just a place holder +cmake_dependent_option( + TM_MPI_DIMENSION "Use n dimensional parallelisation [default=4]" 4 + "TM_USE_MPI" OFF) + +# HIP dependent options +cmake_dependent_option(TM_USE_CUDA_HIP "Enable CUDA support in HIP" OFF + "TM_USE_HIP" OFF) + +# clime and lemon depend on MPI +cmake_dependent_option(TM_USE_LEMON "Use the lemon io library" OFF + "TM_USE_MPI" ON) + +# GPU dependent options +cmake_dependent_option(TM_USE_QUDA_EXPERIMENTAL "Enable QUDA support" ON + "TM_USE_QUDA" OFF) +cmake_dependent_option( + TM_QUDA_FERMIONIC_FORCES "Enable support for fermionic forces using QUDA" + ON "TM_USE_QUDA" OFF) + +cmake_dependent_option(TM_USE_NVHPC "Enable Nvidia HPC toolkit" OFF + "TM_USE_CUDA" OFF) + +# search for blas and lapack +find_package(BLAS REQUIRED) +# +find_package(LAPACK REQUIRED) +set(HAVE_LAPACK ON) +find_package(FLEX REQUIRED) +# do we need bison ? +find_package(BISON REQUIRED) + +set(PACKAGE_NAME ${PROJECT_DESCRIPTION}) +set(PACKAGE_VERSION ${PROJECT_VERSION}) +set(PACKAGE_TARNAME "tmlqcd") +set(PACKAGE_BUGREPORT "curbach@gmx.de") +set(PACKAGE_STRING "${PROJECT_DESCRIPTION} ${PROJECT_VERSION}") + +unset(TM_USE_MPI) +unset(TM_USE_OMP) +unset(HAVE_LIBLEMON) +unset(HAVE_LIBLIME) +unset(FIXEDVOLUME) +unset(_PERSISTENT) +unset(_NON_BLOCKING) +unset(HAVE_LIBQUDA) +unset(TM_USE_QUDA) +unset(TM_QUDA_EXPERIMENTAL) +unset(TM_QUDA_FERMIONIC_FORCES) +unset(DDalphaAMG) +unset(TM_USE_QPHIX) +unset(QPHIX_SOALEN) +unset(_NEW_GEOMETRY) +unset(_NON_BLOCKING) +unset(_USE_SHMEM) +unset(_USE_HALFSPINOR) +set(ALIGN " ") +set(ALIGN_BASE "0") +set(ALIGN_BASE32 "0") +set(ALIGN32 " ") + +message("${TM_ENABLE_ALIGNMENT}") +if (${TM_ENABLE_ALIGNMENT} STREQUAL "auto") + set(ALIGN_BASE "0x00") + set(ALIGN " ") + set(ALIGN_BASE32 "0x00") + set(ALIGN32 " ") +elseif (TM_ENABLE_ALIGNMENT EQUAL 16) + set(ALIGN_BASE "0x0F") + set(ALIGN "__attribute__ ((aligned (16)))") + set(ALIGN_BASE32 "0x0F") + set(ALIGN32 "__attribute__ ((aligned (16)))") +elseif (TM_ENABLE_ALIGNMENT EQUAL 32) + set(ALIGN_BASE "0x2F") + set(ALIGN "__attribute__ ((aligned (32)))") + set(ALIGN_BASE32 "0x2F") + set(ALIGN32 "__attribute__ ((aligned (32)))") +elseif (TM_ENABLE_ALIGNMENT EQUAL 64) + set(ALIGN_BASE "0x3F") + set(ALIGN "__attribute__ ((aligned (64)))") + set(ALIGN_BASE32 "0x3F") + set(ALIGN32 "__attribute__ ((aligned (64)))") +else() + message(FATAL_ERROR "Unusable value for array alignment. Allowed values are: auto, none, 16, 32, 64") +endif() + +if(TM_USE_HALFSPINOR) + set(_USE_HALFSPINOR ON) +endif() + +if(TM_FIXED_VOLUME) + set(FIXEDVOLUME ON) +endif() + +if(TM_PERSISTENT_MPI) + set(_PERSISTENT ON) +endif() + +if(TM_USE_MPI) + find_package(MPI REQUIRED) + set(TM_USE_MPI ON) + if(TM_NONBLOCKING_MPI) + set(_NON_BLOCKING ON) + endif() +endif() + +if(TM_USE_OPENMP) + find_package(OpenMP REQUIRED COMPONENTS C CXX) + set(TM_USE_OMP ON) +endif() + +if(TM_USE_HDF5) + find_package(HDF5 REQUIRED COMPONENTS C) +endif() + +if(TM_USE_LEMON) + find_package(Clemon REQUIRED) + set(HAVE_LIBLEMON ON) +endif() + +find_package(CLime REQUIRED) +set(HAVE_LIBLIME ON) + +if(TM_USE_QUDA) + find_package(QUDA REQUIRED config) + set(HAVE_LIBQUDA ON) + if(TM_USE_QUDA_EXPERIMENTAL) + set(TM_QUDA_EXPERIMENTAL ON) + endif() + if(TM_QUDA_FERMIONIC_FORCES) + set(TM_QUDA_FERMIONIC_FORCES ON) + endif() + if(TM_USE_CUDA OR TM_USE_HIP) + set(TM_USE_QUDA ON) + endif() +endif() + +if(TM_USE_CUDA AND TM_USE_HIP) + message( + ERROR + "HIP and CUDA are mutually exclusive. Please choose one GPU support only") +endif() + +if(TM_USE_CUDA OR QUDA_TARGET_CUDA) + enable_language(CUDA) + if(TM_USE_NVHPC) + find_package(NVHPC REQUIRED COMPONENTS CUDA MATH HOSTUTILS NCCL) + else() + find_package(CUDAToolkit REQUIRED) + endif() +endif() + +message("QUDA_TARGET: ${QUDA_TARGET_CUDA}") +if(TM_USE_HIP OR QUDA_TARGET_HIP) + enable_language(hip) + + # we may want to use hip-cuda for development or debugging purposes especially + # if AMD GPU access is not possible. So allow it + if(TM_USE_CUDA_HIP) + find_package(CUDA) + endif() + + if(CMAKE_HIP_PLATFORM MATCHES "amd") + set(TM_GPU_PLATFORM_DFLAGS "__HIP_PLATFORM_AMD__") + else() + set(TM_GPU_PLATFORM_DFLAGS "__HIP_PLATFORM_NVIDIA__") + endif() +endif() + +if(TM_USE_SHMEM) + set(_USE_SHMEM ON) +endif() + +if(TM_USE_QPIHX) + find_package(QPhiX REQUIRED) + if(NOT TARGET tmlqcd::qphix) + add_library(tmlqcd::qphix INTERFACE IMPORTED) + set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_LINK_LIBRARIES + "${QPHIX_LIBRARIES}") + set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + "${QPHIX_INCLUDE_DIRS}") + endif() + set(TM_USE_QPHIX ON) +endif() + +# check for fftw3 (rely on pkgconfig). +if(TM_USE_FFTW) + pkg_search_module(tmlqcd_fftw3 IMPORTED_TARGET GLOBAL fftw3) + if(tmlqcd_fftw3_FOUND) + add_library(tmlqcd::fftw3 ALIAS PkgConfig::tmlqcd_fftw3) + endif() +endif() + +# gprofiler + +if (TM_USE_GPROF) + set(PROFILE_FLAGS "-pg;-g") + if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "powerpc|powerpc64") + list(APPEND PROFILE_FLAGS "-qfullpath") + endif() + add_compile_options($:$PROFILE_FLAGS>) +endif() + +if (TM_ENABLE_WARNINGS) + add_compile_options( + $<$:-Wall> + $<$:-Wall>) +endif() + +# check for the presence of clock_gettime in libc or librt +check_symbol_exists(clock_gettime "time.h" HAVE_CLOCK_GETTIME) +check_library_exists(rt clock_gettime "" HAVE_CLOCK_GETTIME_IN_RT) +check_function_exists(fseeko HAVE_FSEEKO) + +# set the parallelization + +if(TM_USE_MPI) + if(TM_MPI_DIMENSION EQUAL "1") + # T parallelisation + set(PARALLELT ON) + elseif(TM_MPI_DIMENSION EQUAL "2") + # XT parallelisation + set(PARALLELXT ON) + elseif(TM_MPI_DIMENSION EQUAL "3") + set(PARALLELXYT ON) + # XYZ parallelisation + elseif(TM_MPI_DIMENSION EQUAL "4") + # timeslice-splitted communications + set(PARALLELXYZT ON) + elseif(TM_MPI_DIMENSION EQUAL "X") + set(PARALLELX ON) + elseif(TM_MPI_DIMENSION EQUAL "XY") + set(PARALLELXY ON) + elseif(TM_MPI_DIMENSION EQUAL "XYZ") + set(PARALLELXYZ ON) + else() + set(PARALLELXYZT ON) + endif() +endif() + +# keep the autotool config.h header. +configure_file("${PROJECT_SOURCE_DIR}/cmake/tmlqcd_config_internal.h.in" + "${PROJECT_BINARY_DIR}/tmlqcd_config_internal.h" @ONLY) +configure_file("${PROJECT_SOURCE_DIR}/fixed_volume.h.in" + "${PROJECT_BINARY_DIR}/fixed_volume.h" @ONLY) +# check if git command exists +find_program(GIT_EXE NAMES git) + +# generate version header +string(TIMESTAMP TM_TIMESTAMP "%Y-%m-%d %H:%M:%S") +if(DEFINED GIT_EXE AND EXISTS "${PROJECT_SOURCE_DIR}/.git") + execute_process( + COMMAND git rev-parse HEAD + OUTPUT_VARIABLE TM_SHA + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + execute_process( + COMMAND git describe --all + OUTPUT_VARIABLE TM_GIT_BRANCH + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "git hash ${TM_SHA}") +else() + # set(TM_GIT_BRANCH "release v${SIRIUS_VERSION}") + set(TM_SHA + "https://github.com/etmc/tmLQCD/releases/tag/rel-${TMLQCD_VERSION_MAJOR}-${TMLQCD_VERSION_MINOR}" + ) +endif() + +configure_file(cmake/git_hash.h.in git_hash.h @ONLY) +add_subdirectory(src/lib) +add_subdirectory(src/bin) diff --git a/cmake/FindCLime.cmake b/cmake/FindCLime.cmake new file mode 100644 index 000000000..0c3eabe48 --- /dev/null +++ b/cmake/FindCLime.cmake @@ -0,0 +1,27 @@ +include(FindPackageHandleStandardArgs) + +find_library( + TMLQCD_CLIME_LIBRARIES + NAMES lime + PATH_SUFFIXES "lib" "lib64") + +find_path( + TMLQCD_CLIME_INCLUDE_DIRS + NAMES lime.h + PATH_SUFFIXES "include" "include/${_pacakge_name}" "${_package_name}") + +message("${TMLQCD_CLIME_INCLUDE_DIRS}") +find_package_handle_standard_args(CLime DEFAULT_MSG TMLQCD_CLIME_LIBRARIES + TMLQCD_CLIME_INCLUDE_DIRS) + +if(NOT TARGET tmlqcd::clime) + add_library(tmlqcd::clime INTERFACE IMPORTED) + set_target_properties(tmlqcd::clime PROPERTIES INTERFACE_LINK_LIBRARIES + "${TMLQCD_CLIME_LIBRARIES}") + set_target_properties(tmlqcd::clime PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + "${TMLQCD_CLIME_INCLUDE_DIRS}") +endif() + +set(TMLQCD_CLIME_FOUND ON) +mark_as_advanced(TMLQCD_CLIME_FOUND TMLQCD_CLIME_LIBRARIES + TMLQCD_CLIME_INCLUDE_DIRS) diff --git a/cmake/FindLemon.cmake b/cmake/FindLemon.cmake new file mode 100644 index 000000000..cdeca5e42 --- /dev/null +++ b/cmake/FindLemon.cmake @@ -0,0 +1,25 @@ +include(FindPackageHandleStandardArgs) + +find_library( + TMLQCD_LEMON_LIBRARIES + NAMES lemon + PATH_SUFFIXES "lib" "lib64") + +find_path( + TMLQCD_LEMON_INCLUDE_DIRS + NAMES lemon.h + PATH_SUFFIXES "include" "include/${_pacakge_name}" "${_package_name}") + +find_package_handle_standard_args(Lemon DEFAULT_MSG TMLQCD_LEMON_LIBRARIES + TMLQCD_LEMON_INCLUDE_DIRS) + +if(NOT TARGET tmlqcd::lemon) + add_library(tmlqcd::lemon INTERFACE IMPORTED) + set_target_properties(tmlqcd::lemon PROPERTIES INTERFACE_LINK_LIBRARIES + "${TMLQCD_LEMON_LIBRARIES}") + set_target_properties(tmlqcd::lemon PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + "${TMLQCD_LEMON_INCLUDE_DIRS}") +endif() + +set(TMLQCD_LEMON_FOUND ON) +mark_as_advanced(TMLQCD_LEMON_LIBRARIES TMLQCD_LEMON_INCLUDE_DIRS) diff --git a/cmake/git_hash.h.in b/cmake/git_hash.h.in new file mode 100644 index 000000000..23f624742 --- /dev/null +++ b/cmake/git_hash.h.in @@ -0,0 +1,6 @@ +#ifndef _GIT_HASH_H +#define _GIT_HASH_H + +const char git_hash[] = "@TMLQCD_SHA@"; + +#endif /* _GIT_HASH_H */ diff --git a/include/tmlqcd_config_internal.h.in b/cmake/tmlqcd_config_internal.h.in similarity index 56% rename from include/tmlqcd_config_internal.h.in rename to cmake/tmlqcd_config_internal.h.in index 037ad84a5..5dd9c7096 100644 --- a/include/tmlqcd_config_internal.h.in +++ b/cmake/tmlqcd_config_internal.h.in @@ -4,177 +4,139 @@ * systems, for example. */ /* We are on a CRAY */ -#undef CRAY +#cmakedefine CRAY /* lapack available */ -#undef HAVE_LAPACK +#cmakedefine HAVE_LAPACK /* Define to 1 if you have the `lime' library (-llime). */ -#undef HAVE_LIBLIME +#cmakedefine HAVE_LIBLIME /* Define to 1 if you have the `lemon' library (-llemon). */ -#undef HAVE_LIBLEMON +#cmakedefine HAVE_LIBLEMON /* 1 if clock_gettime is available for use in benchmark */ -#undef HAVE_CLOCK_GETTIME +#cmakedefine HAVE_CLOCK_GETTIME /* Compile with MPI support */ -#undef TM_USE_MPI +#cmakedefine TM_USE_MPI /* Compile with OpenMP support */ -#undef TM_USE_OMP +#cmakedefine TM_USE_OMP /* Compile with FFTW support */ -#undef HAVE_FFTW +#cmakedefine HAVE_FFTW /* Fortran has not extra _ */ -#undef NOF77_ +#cmakedefine NOF77_ /* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT +#define PACKAGE_BUGREPORT "@PACKAGE_BUGREPORT@" /* Define to the full name of this package. */ -#undef PACKAGE_NAME - +#define PACKAGE_NAME "@PROJECT_DESCRIPTION@" /* Define to the full name and version of this package. */ -#undef PACKAGE_STRING +#define PACKAGE_STRING "@PROJECT_VERSION@" /* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME +#define PACKAGE_TARNAME "@PACKAGE_TARNAME@" /* Define to the version of this package. */ -#undef PACKAGE_VERSION +#define PACKAGE_VERSION "@PROJECT_DESCRIPTION@ @PROJECT_VERSION@" /* X parallelisation */ -#undef PARALLELX +#cmakedefine PARALLELX /* XY parallelisation */ -#undef PARALLELXY +#cmakedefine PARALLELXY /* XYZ parallelisation */ -#undef PARALLELXYZ +#cmakedefine PARALLELXYZ /* One dimensional parallelisation */ -#undef PARALLELT +#cmakedefine PARALLELT /* Two dimensional parallelisation */ -#undef PARALLELXT +#cmakedefine PARALLELXT /* Three dimensional parallelisation */ -#undef PARALLELXYT +#cmakedefine PARALLELXYT /* Four dimensional parallelisation */ -#undef PARALLELXYZT +#cmakedefine PARALLELXYZT /* Fixed volume at compiletime */ -#undef FIXEDVOLUME +#cmakedefine FIXEDVOLUME /* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ -#undef HAVE_FSEEKO +#cmakedefine HAVE_FSEEKO /* Alignment for arrays -- necessary for SSE and automated vectorization */ -#undef ALIGN_BASE +#define ALIGN_BASE @ALIGN_BASE@ /* Alignment compiler hint macro */ -#undef ALIGN +#cmakedefine ALIGN @ALIGN@ /* Alignment for 32bit arrays -- necessary for SSE and automated vectorization */ -#undef ALIGN_BASE32 +#define ALIGN_BASE32 @ALIGN_BASE32@ /* Alignment of 32bit fields, compiler hint macro */ -#undef ALIGN32 +#define ALIGN32 @ALIGN32@ /* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a `char[]'. */ -#undef YYTEXT_POINTER +#cmakedefine YYTEXT_POINTER /* Number of bits in a file offset, on hosts where this is settable. */ -#undef _FILE_OFFSET_BITS +#cmakedefine _FILE_OFFSET_BITS @TMLQCD_FILE_OFFSET_BITS@ /* Construct an extra copy of the gauge fields */ -#undef _GAUGE_COPY +#cmakedefine _GAUGE_COPY /* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ -#undef _LARGEFILE_SOURCE +#cmakedefine _LARGEFILE_SOURCE /* Define for large files, on AIX-style hosts. */ -#undef _LARGE_FILES +#cmakedefine _LARGE_FILES /* Use even/odd geometry in the gauge fields */ -#undef _NEW_GEOMETRY +#cmakedefine _NEW_GEOMETRY /* x86 64 Bit architecture */ -#undef _x86_64 - -/* Define to empty if `const' does not conform to ANSI C. */ -#undef const - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -#undef inline -#endif - -/* Define to `long' if does not define. */ -#undef off_t - -/* Define to `unsigned' if does not define. */ -#undef size_t - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if the system has the type `uint16_t'. */ -#undef HAVE_UINT16_T - -/* Define to 1 if the system has the type `uint32_t'. */ -#undef HAVE_UINT32_T - -/* Define to 1 if the system has the type `uint64_t'. */ -#undef HAVE_UINT64_T - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H +#cmakedefine _x86_64 /* Define to 1 if Dirac operator with halfspinor should be used */ -#undef _USE_HALFSPINOR +#cmakedefine _USE_HALFSPINOR /* Define to 1 if shmem API should be used */ -#undef _USE_SHMEM +#cmakedefine _USE_SHMEM /* Define to 1 if KOJAK instrumentalisation should be done*/ -#undef _KOJAK_INST - -/* Define to equivalent of C99 restrict keyword, or to nothing if this is not - supported. Do not define if restrict is supported directly. */ -#undef restrict +#cmakedefine _KOJAK_INST /* Define to 1 if persistent MPI calls for halfspinor should be used */ -#undef _PERSISTENT +#cmakedefine _PERSISTENT /* Define to 1 if non-blocking MPI calls for spinor and gauge should be used */ -#undef _NON_BLOCKING +#cmakedefine _NON_BLOCKING /* Define to 1 if you have the `quda' library (-lquda). */ -#undef HAVE_LIBQUDA +#cmakedefine HAVE_LIBQUDA /* Using QUDA GPU */ -#undef TM_USE_QUDA +#cmakedefine TM_USE_QUDA /* Using experimental QUDA version */ -#undef TM_QUDA_EXPERIMENTAL +#cmakedefine TM_QUDA_EXPERIMENTAL /* Using QUDA fermionic forces */ -#undef TM_QUDA_FERMIONIC_FORCES +#cmakedefine TM_QUDA_FERMIONIC_FORCES /* Using DDalphaAMG */ -#undef DDalphaAMG +#cmakedefine DDalphaAMG /* Using QPHIX */ -#undef TM_USE_QPHIX +#cmakedefine TM_USE_QPHIX /* Structure of Array length to use with QPhiX */ -#undef QPHIX_SOALEN +#cmakedefine QPHIX_SOALEN @TMLQCD_QPHIX_SOALEN@ diff --git a/cmake_includes.txt b/cmake_includes.txt new file mode 100644 index 000000000..b8e105cc0 --- /dev/null +++ b/cmake_includes.txt @@ -0,0 +1,425 @@ +LIST(APPEND IO_SRC_C io_srcio/utils_write_inverter_info.c +io/gauge_read.c +io/utils_write_xlf.c +io/utils_construct_reader.c +io/params_construct_xlfInfo.c +io/utils_kill_with_error.c +io/DML_crc32.c +io/spinor_write_source_format.c +io/deri_write_stdout.c +io/spinor_write_propagator_format.c +io/utils_engineering.c +io/utils_parse_propagator_type.c +io/io_cm.c +io/utils_parse_ildgformat_xml.c +io/utils_read_message.c +io/utils_write_ildg_format.c +io/utils_destruct_writer.c +io/gauge_write.c +io/utils_write_message.c +io/params_construct_ildgFormat.c +io/spinor_read.c +io/utils_close_reader_record.c +io/spinor_read_binary.c +io/utils.c +io/spinor_write_stdout.c +io/spinor_write_info.c +io/utils_write_checksum.c +io/utils_write_header.c +io/eospinor_read.c +io/utils_write_first_message.c +io/params_construct_InverterInfo.c +io/utils_parse_checksum_xml.c +io/utils_construct_writer.c +io/sw_write_stdout.c +io/spinor_write_propagator_type.c +io/gauge_write_binary.c +io/spinor_write.c +io/utils_write_xlf_xml.c +io/params_construct_propagatorFormat.c +io/gauge_read_binary.c +io/dml.c +io/spinor_write_binary.c +io/utils_destruct_reader.c +io/utils_close_writer_record.c +io/eospinor_write.c +io/gauge_write_luscher_binary.c +io/params_construct_sourceFormat.c) + +list(APPEND INIT_SRC_C init/init_dirac_halfspinor.c + init/init_geometry_indices.c + init/init_openmp.c + init/init_gauge_field.c + init/init_parallel.c + init/init_chi_spinor_field.c + init/init_gauge_fg.c + init/init_spinor_field.c + init/init_global_states.c + init/init_bispinor_field.c + init/init_gauge_tmp.c + init/init_critical_globals.c + init/init_omp_accumulators.c + init/init_jacobi_field.c + init/init_stout_smear_vars.c + init/init_moment_field.c) + +list(APPEND SOLVER_SRC_C +solver/bicg_complex.c +solver/dfl_projector.c +solver/eigenvalues_Jacobi.c +solver/gcr.c +solver/gmres_precon.c +solver/chrono_guess.c +solver/gcr4complex.c +solver/jdher.c +solver/gcr4complex_body.c +solver/gmres_dr.c +solver/fgmres4complex_body.c +solver/cg_her_bi.c +solver/solver_field.c +solver/quicksort.c +solver/bicgstab2.c +solver/cgs_real.c +solver/M_plus_block_psi_body.c +solver/little_mg_precon_body.c +solver/cg_her_su3vect.c +solver/little_project_eo_body.c +solver/monomial_solve.c +solver/cr.c +solver/gram-schmidt.c +solver/solver_types.c +solver/mode_number.c +solver/cg_her.c +solver/jdher_bi.c +solver/mrblk_body.c +solver/eigcg.c +solver/jdher_su3vect.c +solver/poly_precon.c +solver/Msap.c +solver/fgmres.c +solver/dirac_operator_eigenvectors.c +solver/incr_eigcg.c +solver/index_jd.c +solver/sumr.c +solver/cgne4complex.c +solver/eigenvalues_bi.c +solver/gmres.c +solver/lu_solve.c +solver/diagonalise_general_matrix.c +solver/mcr.c +solver/bicgstabell.c +solver/rg_mixed_cg_her.c +solver/mixed_cg_her.c +solver/mixed_cg_mms_tm_nd.c +solver/rg_mixed_cg_her_nd.c +solver/spectral_proj.c +solver/restart_X.c +solver/generate_dfl_subspace.c +solver/eigenvalues.c +solver/mcr4complex.c +solver/mr4complex.c +solver/bicgstab_complex.c +solver/cg_mms_tm_nd.c +solver/mr.c +solver/cg_her_nd.c +solver/bicgstab_complex_bi.c +solver/sub_low_ev.c +solver/ortho.c +solver/pcg_her.c +solver/fgmres4complex.c +solver/cg_mms_tm.c +solver/init_guess.c) + +list(APPEND LINALG_SRC_C linalg/assign_mul_bra_add_mul_r.c + linalg/mul_r_gamma5.c + linalg/convert_eo_to_lexic.c + linalg/print_spinor.c + linalg/assign_add_mul_body.c + linalg/mul_diff_mul_r.c + linalg/square_norm_32.c + linalg/mul.c + linalg/mul_r.c + linalg/mul_gamma5.c + linalg/ratio.c + linalg/square_norm.c + linalg/mul_diff_mul.c + linalg/square_and_minmax.c + linalg/add.c + linalg/assign_add_mul_add_mul_r.c + linalg/comp_decomp.c + linalg/mul_add_mul.c + linalg/diff_32.c + linalg/assign_add_mul.c + linalg/addto_32.c + linalg/assign_mul_add_mul_add_mul_add_mul_r.c + linalg/assign_add_mul_r.c + linalg/diff.c + linalg/assign_mul_add_mul_r.c + linalg/scalar_prod_r.c + linalg/assign_to_32.c + linalg/assign_add_mul_add_mul.c + linalg/mul_diff_r.c + linalg/assign_mul_add_r_and_square.c + linalg/assign_mul_add_mul_r_32.c + linalg/assign_mul_add_mul.c + linalg/assign_mul_add_mul_add_mul_r.c + linalg/scalar_prod_r_32.c + linalg/assign_mul_add_r.c + linalg/assign_mul_add_r_32.c + linalg/scalar_prod_su3spinor.c + linalg/convert_even_to_lexic.c + linalg/mul_r_32.c + linalg/assign_add_mul_r_add_mul.c + linalg/convert_odd_to_lexic.c + linalg/diff_and_square_norm.c + linalg/scalar_prod_i.c + linalg/mul_add_mul_r.c + linalg/assign_diff_mul.c + linalg/assign_mul_bra_add_mul_ket_add_r.c + linalg/set_even_to_zero.c + linalg/assign_mul_add.c + linalg/square_and_prod_r.c + linalg/scalar_prod_body.c + linalg/assign_mul_bra_add_mul_ket_add.c + linalg/assign_add_mul_r_32.c + linalg/scalar_prod.c + linalg/mattimesvec.c + linalg/assign.c + linalg/print_spinor_similar_components.c) + +list(APPEND RATIONAL_SRC_C rational/zolotarev.c + rational/rational.c + rational/elliptic.c) + +list(APPEND OPERATOR_SRC_C operator/clover_invert.c + operator/hopping_body_dbl.c + operator/tm_operators_nd_32.c + operator/hopping_sse_dbl.c + operator/halfspinor_body.c + operator/Block_D_psi_body.c + operator/mul_one_pm_imu_sub_mul_body.c + operator/assign_mul_one_sw_pm_imu_site_lexic_body.c + operator/assign_mul_one_sw_pm_imu_inv_block_body.c + operator/clover_accumulate_deriv.c + operator/Hopping_Matrix.c + operator/hopping_bg_dbl.c + operator/tm_operators.c + operator/tm_times_Hopping_Matrix.c + operator/clovertm_operators_32.c + operator/hopping_sgl.c + operator/Dov_proj.c + operator/clover_deriv.c + operator/halfspinor_bg_dbl.c + operator/clover_det.c + operator/clover_leaf.c + operator/D_psi_body.c + operator/clovertm_operators.c + operator/hopping_sse_sgl.c + operator/halfspinor_sse_dbl.c + operator/Dov_psi.c + operator/tm_operators_nd.c + operator/tm_sub_Hopping_Matrix.c + operator/Hopping_Matrix_nocom.c + operator/clover_term.c + operator/halfspinor_bgq_dbl.c + operator/Hopping_Matrix_32_nocom.c + operator/D_psi.c + operator/tm_operators_32.c + operator/Hopping_Matrix_32.c + operator/halfspinor_body_32.c + operator/mul_one_pm_imu_inv_body.c) + +list(APPEND SMEARING_SRC_C smearing/hex_stout_exclude_two.c + smearing/hex_hex_smear.c + smearing/utils_print_su3.c + smearing/hyp_APE_project_exclude_none.c + smearing/hyp_hyp_staples_exclude_one.c + smearing/hyp_APE_project_exclude_one.c + smearing/hex_stout_exclude_one.c + smearing/hyp_hyp_staples_exclude_two.c + smearing/hex_stout_exclude_none.c + smearing/stout_stout_smear.c + smearing/hyp_hyp_smear.c + smearing/hyp_APE_project_exclude_two.c + smearing/utils_project_herm.c + smearing/utils_reunitarize.c + smearing/utils_generic_staples.c + smearing/hyp_hyp_staples_exclude_none.c + smearing/ape_ape_smear.c + smearing/uils_print_config_to_screen.c + smearing/utils_project_antiherm.c + smearing/utils_print_config_to_screen.c + smearing/utils_reunitarize_MILC.c) + +list(APPEND BUFFER_SRC_C + buffers/gauge_return_gauge_field.c + buffers/gauge_get_gauge_field.c + buffers/gauge_finalize_gauge_buffers.c + buffers/gauge_initialize_gauge_buffers.c + buffers/gauge.c + buffers/gauge_free_unused_gauge_buffers.c + buffers/gauge_get_gauge_field_array.c + buffers/utils_generic_exchange.c + buffers/gauge_allocate_gauge_buffers.c + buffers/gauge_return_gauge_field_array.c) + +list(APPEND MONOMIAL_SRC_C + monomial/detratio_monomial.c + monomial/sf_gauge_monomial.c + monomial/poly_monomial.c + monomial/cloverdetratio_monomial.c + monomial/ndrat_monomial.c + monomial/cloverdet_monomial.c + monomial/clover_trlog_monomial.c + monomial/cloverndpoly_monomial.c + monomial/monitor_forces.c + monomial/ndpoly_monomial.c + monomial/det_monomial.c + monomial/monomial.c + monomial/cloverdetratio_rwmonomial.c + monomial/gauge_monomial.c + monomial/clovernd_trlog_monomial.c + monomial/ratcor_monomial.c + monomial/nddetratio_monomial.c + monomial/rat_monomial.c + monomial/ndratcor_monomial.c + monomial/moment_energy.c) + +list(APPEND EXCHANGE_SRC_C xchange/xchange_lexicfield.c +xchange/xchange_2fields.c +xchange/xchange_gauge.c +xchange/xchange_halffield.c +xchange/xchange_jacobi.c +xchange/little_field_gather_body.c +xchange/little_field_gather.c +xchange/xchange_deri.c +xchange/xchange_field.c +xchange/xchange_field_tslice.c) + +list(APPEND MEAS_SRC_C +meas/pion_norm.c +meas/correlators.c +meas/polyakov_loop.c +meas/measurements.c +meas/oriented_plaquettes.c +meas/gradient_flow.c +meas/measure_clover_field_strength_observables.c) + +list(APPEND SF_SRC_C sf/sf_calc_action.c + sf/sf_get_rectangle_staples.c + sf/sf_get_staples.c + sf/sf_observables.c + sf/sf_utils.c + ) + +list(APPEND MAIN_SRC_C +measure_gauge_action.c +start.c +deriv_Sb.c +reweighting_factor_nd.c +ranlxs.c +source_generation.c +read_input.c +invert_doublet_eo.c +geometry_eo.c +getopt.c +offline_measurement.c +tm_debug_printf.c +chebyshev_polynomial_nd.c +invert_eo.c +little_D.c +get_rectangle_staples.c +gen_sources.c +rnd_gauge_trafo.c +test_lemon.c +LapH_ev.c +benchmark.c +measure_rectangles.c +check_locallity.c +invert.c +deriv_Sb_D_psi.c +deriv_mg_tune.c +mpi_init.c +update_momenta_fg.c +gamma.c +matrix_utils.c +reweighting_factor.c +update_tm.c +jacobi.c +invert_overlap.c +phmc.c +get_staples.c +clenshaw_coef.c +block.c +spinor_fft.c +boundary.c +little_D_body.c +X_psi.c +prepare_source.c +DDalphaAMG_interface.c +update_backward_gauge.c +invert_clover_eo.c +gettime.c +hmc_tm.c +update_momenta.c +sighandler.c +compare_derivative.c +ranlxd.c +DirectPut.c +aligned_malloc.c +fatal_error.c +operator.c +cu/cu.c +chebyshev_polynomial.c +qphix_test_Dslash.c +expo.c +overrelaxation.c +Ptilde_nd.c +update_gauge.c +hopping_test.c +integrator.c +P_M_eta.c) + +if (TMLQCD_USE_QPHIX) +list(APPEND MAIN_SRC_C qphix_interface.cpp) +endif() + +if (TMLQCD_USE_QUDA) +list(APPEND MAIN_SRC_C quda_interface.c) +endif() + +list(APPEND ALL_SRC ${MAIN_SRC_C} ${SF_SRC_C} ${XCHANGE_SRC_C} ${MONOMIAL_SRC_C} ${BUFFER_SRC_C} ${SMEARING_SRC_C} ${OPERATOR_SRC_C} ${RATIONAL_SRC_C} ${LINALG_SRC_C} ${IO_SRC_C} ${INIT_SRC_C} ${SOLVER_SRC_C}) + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +# cmake 4.0 uses a different syntax for the option +flex_target(tmlqcd_input_read input_read.l input_read.c + $<$:COMPILE_FLAGS "-Ca -Ptmlqcd"> + $<$:OPTIONS "-Ca;-Ptmlqcd">) + +# create a target library with namespacing because cmake does not know name space at all +add_library(tmlqcd::hmc ALL_SRC ${FLEX_tmlqcd_input_read_OUTPUTS}) +set_target_properties(tmlqcd::hmc PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION 1) + +# define a library and add the dependencies +target_link_libraries(tmlqcd::hmc + $<$:rt> + $<$:tmlqcd::lime> + $<$:tmlqcd::lemon> + $<$:tmlqcd::qphix> + $<$:tmlqcd::fftw3> + $<$:MPI::MPI_C MPI::MPI_CXX> + $<$:quda::quda> + $<$:CUDA::cufft CUDA::cufftw CUDA::cublas CUDA::cudart CUDA::cuda_driver> + $<$:hip::hipfft roc::hipblas hip::host> + ${LAPACK_LIBRARIES} + ${BLAS_LIBRARIES} + $<$:OpenMP::OpenMP_C OpenMP::OpenMP_CXX> + m) + +target_compile_definitions(tmlqcd::hmc + $<$:${TMLQCD_GPU_PLATFORM_DFLAGS}> + ) + +target_include_directories(tmlqcd::hmc PUBLIC $ + PRIVATE "init io linalg meas monomial operator profiling rational sf smearing solver util xchange wrapper") diff --git a/io/Makefile.in b/io/Makefile.in deleted file mode 100644 index 41b5b78ce..000000000 --- a/io/Makefile.in +++ /dev/null @@ -1,135 +0,0 @@ - -srcdir = @srcdir@ -top_builddir = @top_builddir@ -abs_top_builddir = @abs_top_builddir@ -top_srcdir = @top_srcdir@ -abs_top_srcdir = @abs_top_srcdir@ -subdir = io -builddir = @builddir@ - -CFLAGS = @CFLAGS@ -DEPFLAGS = @DEPFLAGS@ -LDFLAGS = @LDFLAGS@ -DEFS = @DEFS@ -OPTARGS = @OPTARGS@ - -AR = @AR@ -RANLIB = @RANLIB@ -CC = @CC@ -CCDEP = @CCDEP@ -CCLD = $(CC) -LINK = $(CCLD) $(CFLAGS) $(LDFLAGS) ${OPTARGS} -o $@ -LEX = @LEX@ -AUTOCONF = @AUTOCONF@ -DEFS = @DEFS@ - -LEMON_AVAILABLE = @LEMON_AVAILABLE@ - -INCLUDES = @INCLUDES@ -LDADD = -COMPILE = ${CC} ${DEFS} ${INCLUDES} ${CFLAGS} ${OPTARGS} - -LIBRARIES = libio - -libio_TARGETS = utils_engineering \ - utils_parse_checksum_xml \ - utils_write_message \ - utils_read_message \ - gauge_write_binary \ - gauge_read_binary \ - gauge_read \ - gauge_write \ - utils_write_xlf \ - utils_write_xlf_xml \ - utils_write_ildg_format \ - utils_write_header \ - utils_write_checksum \ - utils_write_inverter_info \ - utils_kill_with_error \ - utils_construct_reader \ - utils_destruct_reader \ - utils_construct_writer \ - utils_destruct_writer \ - utils_close_writer_record \ - utils_close_reader_record \ - utils_write_first_message \ - utils_parse_propagator_type \ - utils_parse_ildgformat_xml \ - params_construct_ildgFormat \ - params_construct_propagatorFormat \ - params_construct_sourceFormat \ - params_construct_xlfInfo \ - params_construct_InverterInfo \ - spinor_write \ - spinor_read \ - spinor_write_binary \ - spinor_read_binary \ - spinor_write_info \ - spinor_write_source_format \ - spinor_write_propagator_format \ - spinor_write_propagator_type \ - utils DML_crc32 dml \ - eospinor_write \ - eospinor_read \ - io_cm \ - deri_write_stdout spinor_write_stdout sw_write_stdout \ - gauge_write_luscher_binary - -libio_OBJECTS = $(addsuffix .o, ${libio_TARGETS}) - -# default rule - -all: Makefile dep libio.a - -# rules for debugging -debug all-debug: CFLAGS := $(CFLAGS) @DEBUG_FLAG@ -debug all-debug: all - -# rules for profiling information -profile all-profile: CFLAGS := $(filter-out -fomit-frame-pointer,${CFLAGS}) @PROFILE_FLAG@ -profile all-profile: all - - -#include dep rules - - --include $(addsuffix .d,${libio_TARGETS}) - -include ${top_srcdir}/Makefile.global - -# rule to compile objects - -%.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/include/tmlqcd_config_internal.h - $(COMPILE) -c $< - - -# rule to make libio -libio.a: ${libio_OBJECTS} Makefile - @rm -f libio.a - @${AR} cru libio.a $(libio_OBJECTS) - @$(RANLIB) libio.a - @cp libio.a ${top_builddir}/lib/libio.a - -# rule to generate .d files - -$(addsuffix .d,$(libio_TARGETS)): %.d: ${srcdir}/%.c Makefile - @$(CCDEP) ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@ - -# rule to make dependencies - -dep: ${addsuffix .d, ${libio_TARGETS}} - -# rules to clean - -compile-clean: Makefile - rm -f ${$(addsuffix _OBJECTS, ${LIBRARIES})} *.d - -clean: compile-clean - rm -f $(addsuffix .a, ${LIBRARIES}) - rm -f ../lib/libio.a - -distclean: clean - rm -f Makefile - - -.PHONY: all dep clean compile-clean distclean debug all-debug profile all-profile diff --git a/src/bin/CMakeLists.txt b/src/bin/CMakeLists.txt new file mode 100644 index 000000000..29c9c1d8a --- /dev/null +++ b/src/bin/CMakeLists.txt @@ -0,0 +1,19 @@ +list(APPEND tmlqcd_prog "benchmark;deriv_mg_tune;hmc_tm;offline_measurement") + +include_directories( + $ + $ + $ + $<$:${TM_LEMON_INCLUDE_DIRS}> + ${TM_CLIME_INCLUDE_DIRS}) + +foreach(_prog ${tmlqcd_prog}) + add_executable(${_prog} "${_prog}.c") + + target_link_libraries(${_prog} PUBLIC hmc) + set_target_properties( + ${_prog} + PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" + POSITION_INDEPENDENT_CODE ON + LINKER_LANGUAGE "CXX") +endforeach() diff --git a/LapH_ev.c b/src/bin/LapH_ev.c similarity index 100% rename from LapH_ev.c rename to src/bin/LapH_ev.c diff --git a/benchmark.c b/src/bin/benchmark.c similarity index 100% rename from benchmark.c rename to src/bin/benchmark.c diff --git a/check_locallity.c b/src/bin/check_locallity.c similarity index 99% rename from check_locallity.c rename to src/bin/check_locallity.c index 9ed46daee..52ea21209 100644 --- a/check_locallity.c +++ b/src/bin/check_locallity.c @@ -17,10 +17,8 @@ * along with tmLQCD. If not, see . ***********************************************************************/ -#include "lime.h" -#ifdef HAVE_CONFIG_H +#include #include -#endif #include #include #include diff --git a/deriv_mg_tune.c b/src/bin/deriv_mg_tune.c similarity index 100% rename from deriv_mg_tune.c rename to src/bin/deriv_mg_tune.c diff --git a/gen_sources.c b/src/bin/gen_sources.c similarity index 100% rename from gen_sources.c rename to src/bin/gen_sources.c diff --git a/hmc_tm.c b/src/bin/hmc_tm.c similarity index 100% rename from hmc_tm.c rename to src/bin/hmc_tm.c diff --git a/hopping_test.c b/src/bin/hopping_test.c similarity index 100% rename from hopping_test.c rename to src/bin/hopping_test.c diff --git a/invert.c b/src/bin/invert.c similarity index 100% rename from invert.c rename to src/bin/invert.c diff --git a/util/main_ildg2uk.c b/src/bin/main_ildg2uk.c similarity index 100% rename from util/main_ildg2uk.c rename to src/bin/main_ildg2uk.c diff --git a/offline_measurement.c b/src/bin/offline_measurement.c similarity index 100% rename from offline_measurement.c rename to src/bin/offline_measurement.c diff --git a/qphix_test_Dslash.c b/src/bin/qphix_test_Dslash.c similarity index 100% rename from qphix_test_Dslash.c rename to src/bin/qphix_test_Dslash.c diff --git a/test/scalar_prod_r_test.c b/src/bin/scalar_prod_r_test.c similarity index 100% rename from test/scalar_prod_r_test.c rename to src/bin/scalar_prod_r_test.c diff --git a/test/test_eigenvalues.c b/src/bin/test_eigenvalues.c similarity index 100% rename from test/test_eigenvalues.c rename to src/bin/test_eigenvalues.c diff --git a/test_lemon.c b/src/bin/test_lemon.c similarity index 100% rename from test_lemon.c rename to src/bin/test_lemon.c diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt new file mode 100644 index 000000000..4ace6c997 --- /dev/null +++ b/src/lib/CMakeLists.txt @@ -0,0 +1,457 @@ +list( + APPEND + IO_SRC_C + io/utils_write_inverter_info.c + io/gauge_read.c + io/utils_write_xlf.c + io/utils_construct_reader.c + io/params_construct_xlfInfo.c + io/utils_kill_with_error.c + io/DML_crc32.c + io/spinor_write_source_format.c + io/deri_write_stdout.c + io/spinor_write_propagator_format.c + io/utils_engineering.c + io/utils_parse_propagator_type.c + io/io_cm.c + io/utils_parse_ildgformat_xml.c + io/utils_read_message.c + io/utils_write_ildg_format.c + io/utils_destruct_writer.c + io/gauge_write.c + io/utils_write_message.c + io/params_construct_ildgFormat.c + io/spinor_read.c + io/utils_close_reader_record.c + io/spinor_read_binary.c + io/utils.c + io/spinor_write_stdout.c + io/spinor_write_info.c + io/utils_write_checksum.c + io/utils_write_header.c + io/eospinor_read.c + io/utils_write_first_message.c + io/params_construct_InverterInfo.c + io/utils_parse_checksum_xml.c + io/utils_construct_writer.c + io/sw_write_stdout.c + io/spinor_write_propagator_type.c + io/gauge_write_binary.c + io/spinor_write.c + io/utils_write_xlf_xml.c + io/params_construct_propagatorFormat.c + io/gauge_read_binary.c + io/dml.c + io/spinor_write_binary.c + io/utils_destruct_reader.c + io/utils_close_writer_record.c + io/eospinor_write.c + io/gauge_write_luscher_binary.c + io/params_construct_sourceFormat.c) + +list( + APPEND + INIT_SRC_C + init/init_dirac_halfspinor.c + init/init_geometry_indices.c + init/init_openmp.c + init/init_gauge_field.c + init/init_parallel.c + init/init_chi_spinor_field.c + init/init_gauge_fg.c + init/init_spinor_field.c + init/init_global_states.c + init/init_bispinor_field.c + init/init_gauge_tmp.c + init/init_critical_globals.c + init/init_omp_accumulators.c + # init/init_stout_smear_vars.c + init/init_moment_field.c) + +list( + APPEND + SOLVER_SRC_C + solver/bicg_complex.c + solver/dfl_projector.c + solver/gcr.c + # solver/gmres_precon.c + solver/chrono_guess.c + solver/gcr4complex.c + solver/jdher.c + # solver/gcr4complex_body.c + solver/gmres_dr.c + # solver/fgmres4complex_body.c + solver/cg_her_bi.c + solver/solver_field.c + solver/quicksort.c + solver/bicgstab2.c + solver/cgs_real.c + # solver/M_plus_block_psi_body.c solver/little_mg_precon_body.c + # solver/little_project_eo_body.c + solver/monomial_solve.c + solver/cr.c + solver/gram-schmidt.c + solver/solver_types.c + solver/cg_her.c + solver/jdher_bi.c + # solver/mrblk_body.c + solver/eigcg.c + solver/poly_precon.c + solver/Msap.c + solver/fgmres.c + solver/dirac_operator_eigenvectors.c + solver/incr_eigcg.c + solver/index_jd.c + solver/sumr.c + solver/cgne4complex.c + solver/eigenvalues_bi.c + solver/gmres.c + solver/lu_solve.c + solver/diagonalise_general_matrix.c + solver/mcr.c + solver/bicgstabell.c + solver/rg_mixed_cg_her.c + solver/mixed_cg_her.c + solver/mixed_cg_mms_tm_nd.c + solver/rg_mixed_cg_her_nd.c + solver/restart_X.c + solver/generate_dfl_subspace.c + solver/eigenvalues.c + solver/mcr4complex.c + solver/mr4complex.c + solver/bicgstab_complex.c + solver/cg_mms_tm_nd.c + solver/mr.c + solver/cg_her_nd.c + solver/bicgstab_complex_bi.c + solver/sub_low_ev.c + solver/ortho.c + solver/pcg_her.c + solver/fgmres4complex.c + solver/cg_mms_tm.c + solver/init_guess.c) + +list( + APPEND + LINALG_SRC_C + linalg/assign_mul_bra_add_mul_r.c + linalg/mul_r_gamma5.c + linalg/convert_eo_to_lexic.c + linalg/print_spinor.c + # linalg/assign_add_mul_body.c + linalg/mul_diff_mul_r.c + linalg/square_norm_32.c + linalg/mul.c + linalg/mul_r.c + linalg/mul_gamma5.c + linalg/ratio.c + linalg/square_norm.c + linalg/mul_diff_mul.c + linalg/square_and_minmax.c + linalg/add.c + linalg/assign_add_mul_add_mul_r.c + linalg/comp_decomp.c + linalg/mul_add_mul.c + linalg/diff_32.c + linalg/assign_add_mul.c + linalg/addto_32.c + linalg/assign_mul_add_mul_add_mul_add_mul_r.c + linalg/assign_add_mul_r.c + linalg/diff.c + linalg/assign_mul_add_mul_r.c + linalg/scalar_prod_r.c + linalg/assign_to_32.c + linalg/assign_add_mul_add_mul.c + linalg/mul_diff_r.c + linalg/assign_mul_add_r_and_square.c + linalg/assign_mul_add_mul_r_32.c + linalg/assign_mul_add_mul.c + linalg/assign_mul_add_mul_add_mul_r.c + linalg/scalar_prod_r_32.c + linalg/assign_mul_add_r.c + linalg/assign_mul_add_r_32.c + linalg/assign_add_mul_r_32.c + linalg/convert_even_to_lexic.c + linalg/mul_r_32.c + linalg/assign_add_mul_r_add_mul.c + linalg/convert_odd_to_lexic.c + linalg/diff_and_square_norm.c + linalg/scalar_prod_i.c + linalg/mul_add_mul_r.c + linalg/assign_diff_mul.c + linalg/assign_mul_bra_add_mul_ket_add_r.c + linalg/set_even_to_zero.c + linalg/assign_mul_add.c + linalg/square_and_prod_r.c + # linalg/scalar_prod_body.c + linalg/assign_mul_bra_add_mul_ket_add.c + linalg/assign_add_mul_r_32.c + linalg/scalar_prod.c + linalg/mattimesvec.c + linalg/assign.c + linalg/print_spinor_similar_components.c) + +list(APPEND RATIONAL_SRC_C rational/zolotarev.c rational/rational.c + rational/elliptic.c) + +list( + APPEND + OPERATOR_SRC_C + operator/clover_invert.c + # operator/hopping_body_dbl.c + operator/tm_operators_nd_32.c + # operator/halfspinor_body.c operator/Block_D_psi_body.c + # operator/mul_one_pm_imu_sub_mul_body.c + # operator/assign_mul_one_sw_pm_imu_site_lexic_body.c + # operator/assign_mul_one_sw_pm_imu_inv_block_body.c + operator/clover_accumulate_deriv.c + operator/Hopping_Matrix.c + operator/tm_operators.c + operator/tm_times_Hopping_Matrix.c + operator/clovertm_operators_32.c + # operator/hopping_sgl.c + operator/Dov_proj.c + operator/clover_deriv.c + operator/clover_det.c + operator/clover_leaf.c + # operator/D_psi_body.c + operator/clovertm_operators.c + operator/Dov_psi.c + operator/tm_operators_nd.c + operator/tm_sub_Hopping_Matrix.c + operator/Hopping_Matrix_nocom.c + operator/clover_term.c + operator/Hopping_Matrix_32_nocom.c + operator/D_psi.c + operator/tm_operators_32.c + operator/Hopping_Matrix_32.c) +# operator/halfspinor_body_32.c operator/mul_one_pm_imu_inv_body.c) + +list( + APPEND + SMEARING_SRC_C + smearing/hex_stout_exclude_two.c + smearing/hex_hex_smear.c + smearing/utils_print_su3.c + smearing/hyp_APE_project_exclude_none.c + smearing/hyp_hyp_staples_exclude_one.c + smearing/hyp_APE_project_exclude_one.c + smearing/hex_stout_exclude_one.c + smearing/hyp_hyp_staples_exclude_two.c + smearing/hex_stout_exclude_none.c + smearing/stout_stout_smear.c + smearing/hyp_hyp_smear.c + smearing/hyp_APE_project_exclude_two.c + smearing/utils_project_herm.c + smearing/utils_reunitarize.c + smearing/utils_generic_staples.c + smearing/hyp_hyp_staples_exclude_none.c + smearing/ape_ape_smear.c + smearing/uils_print_config_to_screen.c + smearing/utils_project_antiherm.c) +# smearing/utils_print_config_to_screen.c smearing/utils_reunitarize_MILC.c) + +list( + APPEND + BUFFER_SRC_C + buffers/gauge_return_gauge_field.c + buffers/gauge_get_gauge_field.c + buffers/gauge_finalize_gauge_buffers.c + buffers/gauge_initialize_gauge_buffers.c + buffers/gauge.c + buffers/gauge_free_unused_gauge_buffers.c + buffers/gauge_get_gauge_field_array.c + buffers/utils_generic_exchange.c + buffers/gauge_allocate_gauge_buffers.c + buffers/gauge_return_gauge_field_array.c) + +list( + APPEND + MONOMIAL_SRC_C + monomial/detratio_monomial.c + monomial/poly_monomial.c + monomial/cloverdetratio_monomial.c + monomial/ndrat_monomial.c + monomial/cloverdet_monomial.c + monomial/clover_trlog_monomial.c + monomial/cloverndpoly_monomial.c + monomial/monitor_forces.c + monomial/ndpoly_monomial.c + monomial/det_monomial.c + monomial/monomial.c + monomial/cloverdetratio_rwmonomial.c + monomial/gauge_monomial.c + monomial/clovernd_trlog_monomial.c + monomial/ratcor_monomial.c + monomial/nddetratio_monomial.c + monomial/rat_monomial.c + monomial/ndratcor_monomial.c + monomial/moment_energy.c) + +list( + APPEND + EXCHANGE_SRC_C + xchange/xchange_lexicfield.c + xchange/xchange_2fields.c + xchange/xchange_gauge.c + xchange/xchange_halffield.c + # xchange/xchange_jacobi.c xchange/little_field_gather_body.c + xchange/little_field_gather.c + xchange/xchange_deri.c + xchange/xchange_field.c) +# xchange/xchange_field_tslice.c) + +list( + APPEND + MEAS_SRC_C + meas/pion_norm.c + meas/correlators.c + meas/polyakov_loop.c + meas/measurements.c + meas/oriented_plaquettes.c + meas/gradient_flow.c + meas/measure_clover_field_strength_observables.c) + +list( + APPEND + MAIN_SRC_C + # cu/cu.c + measure_gauge_action.c + start.c + deriv_Sb.c + reweighting_factor_nd.c + ranlxs.c + source_generation.c + invert_doublet_eo.c + geometry_eo.c + getopt.c + tm_debug_printf.c + chebyshev_polynomial_nd.c + invert_eo.c + little_D.c + get_rectangle_staples.c + rnd_gauge_trafo.c + measure_rectangles.c + #invert.c + deriv_Sb_D_psi.c + mpi_init.c + update_momenta_fg.c + gamma.c + matrix_utils.c + reweighting_factor.c + update_tm.c + invert_overlap.c + phmc.c + get_staples.c + clenshaw_coef.c + block.c + spinor_fft.c + boundary.c + prepare_source.c + DDalphaAMG_interface.c + update_backward_gauge.c + invert_clover_eo.c + gettime.c + update_momenta.c + sighandler.c + compare_derivative.c + ranlxd.c + aligned_malloc.c + fatal_error.c + operator.c + # cu/cu.c chebyshev_polynomial.c qphix_test_Dslash.c + expo.c + overrelaxation.c + Ptilde_nd.c + update_gauge.c + # hopping_test.c + integrator.c) + +list(APPEND TEST_SRC_C test/check_xchange.c test/check_geometry.c + test/overlaptests.c) +if(TMLQCD_USE_QPHIX) + list(APPEND MAIN_SRC_C QphiX/qphix_interface.cpp) +endif() + +if(TMLQCD_USE_QUDA) + list(APPEND MAIN_SRC_C quda_interface.c) +endif() + +list( + APPEND + ALL_SRC + ${MAIN_SRC_C} + ${EXCHANGE_SRC_C} + ${MONOMIAL_SRC_C} + ${BUFFER_SRC_C} + ${SMEARING_SRC_C} + ${OPERATOR_SRC_C} + ${RATIONAL_SRC_C} + ${LINALG_SRC_C} + ${IO_SRC_C} + ${INIT_SRC_C} + ${SOLVER_SRC_C} + ${TEST_SRC_C} + ${MEAS_SRC_C}) + +include_directories( + $ + $ + $ + $<$:${TMLQCD_LEMON_INCLUDE_DIRS}> + ${TMLQCD_CLIME_INCLUDE_DIRS}) + +# cmake 4.0 uses a different syntax for the option +if(CMAKE_MAJOR_VERSION LESS 4) + flex_target(tmlqcd_input_read read_input.l read_input.c + COMPILE_FLAGS "-Ca -Ptmlqcd") +else() + flex_target(tmlqcd_input_read read_input.l read_input.c OPTIONS + "-Ca -Ptmlqcd") +endif() + +# create a target library with namespacing because cmake does not know name +# space at all + +if (BUILD_SHARED_LIBS) + add_library(hmc SHARED "${ALL_SRC};${FLEX_tmlqcd_input_read_OUTPUTS}") +else() + add_library(hmc STATIC "${ALL_SRC};${FLEX_tmlqcd_input_read_OUTPUTS}") +endif() + +set_target_properties(hmc PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION 1) + +# define a library and add the dependencies +target_link_libraries( + hmc + PUBLIC $<$:rt> + $<$:tmlqcd::clime> + $<$:clemon::lemon> + $<$:tmlqcd::qphix> + $<$:tmlqcd::fftw3> + $<$:MPI::MPI_C + MPI::MPI_CXX> + $<$:QUDA::quda> + $<$:CUDA::cufft + CUDA::cufftw + CUDA::cublas + CUDA::cudart + CUDA::cuda_driver> + $<$:hip::hipfft + roc::hipblas + hip::host> + ${LAPACK_LIBRARIES} + ${BLAS_LIBRARIES} + $<$:OpenMP::OpenMP_C + OpenMP::OpenMP_CXX> + m) + +target_compile_definitions( + hmc PUBLIC HAVE_CONFIG_H + $<$:${TM_GPU_PLATFORM_DFLAGS}>) + +target_include_directories( + hmc + PUBLIC $ + $ + $) diff --git a/DDalphaAMG_interface.c b/src/lib/DDalphaAMG_interface.c similarity index 100% rename from DDalphaAMG_interface.c rename to src/lib/DDalphaAMG_interface.c diff --git a/DDalphaAMG_interface.h b/src/lib/DDalphaAMG_interface.h similarity index 100% rename from DDalphaAMG_interface.h rename to src/lib/DDalphaAMG_interface.h diff --git a/Ptilde_nd.c b/src/lib/Ptilde_nd.c similarity index 100% rename from Ptilde_nd.c rename to src/lib/Ptilde_nd.c diff --git a/Ptilde_nd.h b/src/lib/Ptilde_nd.h similarity index 100% rename from Ptilde_nd.h rename to src/lib/Ptilde_nd.h diff --git a/aligned_malloc.c b/src/lib/aligned_malloc.c similarity index 100% rename from aligned_malloc.c rename to src/lib/aligned_malloc.c diff --git a/aligned_malloc.h b/src/lib/aligned_malloc.h similarity index 100% rename from aligned_malloc.h rename to src/lib/aligned_malloc.h diff --git a/block.c b/src/lib/block.c similarity index 100% rename from block.c rename to src/lib/block.c diff --git a/block.h b/src/lib/block.h similarity index 100% rename from block.h rename to src/lib/block.h diff --git a/boundary.c b/src/lib/boundary.c similarity index 100% rename from boundary.c rename to src/lib/boundary.c diff --git a/boundary.h b/src/lib/boundary.h similarity index 100% rename from boundary.h rename to src/lib/boundary.h diff --git a/buffers/Makefile.in b/src/lib/buffers/Makefile.in similarity index 100% rename from buffers/Makefile.in rename to src/lib/buffers/Makefile.in diff --git a/buffers/gauge.c b/src/lib/buffers/gauge.c similarity index 100% rename from buffers/gauge.c rename to src/lib/buffers/gauge.c diff --git a/buffers/gauge.h b/src/lib/buffers/gauge.h similarity index 100% rename from buffers/gauge.h rename to src/lib/buffers/gauge.h diff --git a/buffers/gauge.ih b/src/lib/buffers/gauge.ih similarity index 100% rename from buffers/gauge.ih rename to src/lib/buffers/gauge.ih diff --git a/buffers/gauge_allocate_gauge_buffers.c b/src/lib/buffers/gauge_allocate_gauge_buffers.c similarity index 100% rename from buffers/gauge_allocate_gauge_buffers.c rename to src/lib/buffers/gauge_allocate_gauge_buffers.c diff --git a/buffers/gauge_finalize_gauge_buffers.c b/src/lib/buffers/gauge_finalize_gauge_buffers.c similarity index 100% rename from buffers/gauge_finalize_gauge_buffers.c rename to src/lib/buffers/gauge_finalize_gauge_buffers.c diff --git a/buffers/gauge_free_unused_gauge_buffers.c b/src/lib/buffers/gauge_free_unused_gauge_buffers.c similarity index 100% rename from buffers/gauge_free_unused_gauge_buffers.c rename to src/lib/buffers/gauge_free_unused_gauge_buffers.c diff --git a/buffers/gauge_get_gauge_field.c b/src/lib/buffers/gauge_get_gauge_field.c similarity index 100% rename from buffers/gauge_get_gauge_field.c rename to src/lib/buffers/gauge_get_gauge_field.c diff --git a/buffers/gauge_get_gauge_field_array.c b/src/lib/buffers/gauge_get_gauge_field_array.c similarity index 100% rename from buffers/gauge_get_gauge_field_array.c rename to src/lib/buffers/gauge_get_gauge_field_array.c diff --git a/buffers/gauge_initialize_gauge_buffers.c b/src/lib/buffers/gauge_initialize_gauge_buffers.c similarity index 100% rename from buffers/gauge_initialize_gauge_buffers.c rename to src/lib/buffers/gauge_initialize_gauge_buffers.c diff --git a/buffers/gauge_return_gauge_field.c b/src/lib/buffers/gauge_return_gauge_field.c similarity index 100% rename from buffers/gauge_return_gauge_field.c rename to src/lib/buffers/gauge_return_gauge_field.c diff --git a/buffers/gauge_return_gauge_field_array.c b/src/lib/buffers/gauge_return_gauge_field_array.c similarity index 100% rename from buffers/gauge_return_gauge_field_array.c rename to src/lib/buffers/gauge_return_gauge_field_array.c diff --git a/buffers/utils.h b/src/lib/buffers/utils.h similarity index 100% rename from buffers/utils.h rename to src/lib/buffers/utils.h diff --git a/buffers/utils.ih b/src/lib/buffers/utils.ih similarity index 100% rename from buffers/utils.ih rename to src/lib/buffers/utils.ih diff --git a/buffers/utils_generic_exchange.blocking.inc b/src/lib/buffers/utils_generic_exchange.blocking.inc similarity index 100% rename from buffers/utils_generic_exchange.blocking.inc rename to src/lib/buffers/utils_generic_exchange.blocking.inc diff --git a/buffers/utils_generic_exchange.c b/src/lib/buffers/utils_generic_exchange.c similarity index 100% rename from buffers/utils_generic_exchange.c rename to src/lib/buffers/utils_generic_exchange.c diff --git a/buffers/utils_generic_exchange.nonblocking.inc b/src/lib/buffers/utils_generic_exchange.nonblocking.inc similarity index 100% rename from buffers/utils_generic_exchange.nonblocking.inc rename to src/lib/buffers/utils_generic_exchange.nonblocking.inc diff --git a/chebyshev_polynomial.c b/src/lib/chebyshev_polynomial.c similarity index 98% rename from chebyshev_polynomial.c rename to src/lib/chebyshev_polynomial.c index 501937b80..f67055cda 100644 --- a/chebyshev_polynomial.c +++ b/src/lib/chebyshev_polynomial.c @@ -280,11 +280,11 @@ void degree_of_polynomial(const int repro) { printf("||auxc_3||=%e\n",temp); */ diff(&auxs[0], &auxs[0], &aux3s[0], VOLUME / 2); - temp = square_norm(&auxs[0], VOLUME / 2) / square_norm(&aux3s[0], VOLUME / 2, 1) / 4.0; + temp = square_norm(&auxs[0], VOLUME / 2, 1) / square_norm(&aux3s[0], VOLUME / 2, 1) / 4.0; if (g_proc_id == g_stdio_proc) { printf("difference=%e\n", temp); diff(&auxc[0], &auxc[0], &aux3c[0], VOLUME / 2); - temp = square_norm(&auxc[0], VOLUME / 2) / square_norm(&aux3c[0], VOLUME / 2, 1) / 4.0; + temp = square_norm(&auxc[0], VOLUME / 2, 1) / square_norm(&aux3c[0], VOLUME / 2, 1) / 4.0; printf("difference=%e\n", temp); } if (temp < stopeps) break; diff --git a/chebyshev_polynomial.h b/src/lib/chebyshev_polynomial.h similarity index 100% rename from chebyshev_polynomial.h rename to src/lib/chebyshev_polynomial.h diff --git a/chebyshev_polynomial_nd.c b/src/lib/chebyshev_polynomial_nd.c similarity index 100% rename from chebyshev_polynomial_nd.c rename to src/lib/chebyshev_polynomial_nd.c diff --git a/chebyshev_polynomial_nd.h b/src/lib/chebyshev_polynomial_nd.h similarity index 100% rename from chebyshev_polynomial_nd.h rename to src/lib/chebyshev_polynomial_nd.h diff --git a/clenshaw_coef.c b/src/lib/clenshaw_coef.c similarity index 100% rename from clenshaw_coef.c rename to src/lib/clenshaw_coef.c diff --git a/clenshaw_coef.h b/src/lib/clenshaw_coef.h similarity index 100% rename from clenshaw_coef.h rename to src/lib/clenshaw_coef.h diff --git a/compare_derivative.c b/src/lib/compare_derivative.c similarity index 100% rename from compare_derivative.c rename to src/lib/compare_derivative.c diff --git a/compare_derivative.h b/src/lib/compare_derivative.h similarity index 100% rename from compare_derivative.h rename to src/lib/compare_derivative.h diff --git a/cu/COPYING b/src/lib/cu/COPYING similarity index 100% rename from cu/COPYING rename to src/lib/cu/COPYING diff --git a/cu/COPYING.LESSER b/src/lib/cu/COPYING.LESSER similarity index 100% rename from cu/COPYING.LESSER rename to src/lib/cu/COPYING.LESSER diff --git a/cu/Makefile.in b/src/lib/cu/Makefile.in similarity index 100% rename from cu/Makefile.in rename to src/lib/cu/Makefile.in diff --git a/cu/check-regressions b/src/lib/cu/check-regressions similarity index 100% rename from cu/check-regressions rename to src/lib/cu/check-regressions diff --git a/cu/cu.c b/src/lib/cu/cu.c similarity index 100% rename from cu/cu.c rename to src/lib/cu/cu.c diff --git a/cu/cu.h b/src/lib/cu/cu.h similarity index 100% rename from cu/cu.h rename to src/lib/cu/cu.h diff --git a/default_input_values.h b/src/lib/default_input_values.h similarity index 100% rename from default_input_values.h rename to src/lib/default_input_values.h diff --git a/deriv_Sb.c b/src/lib/deriv_Sb.c similarity index 100% rename from deriv_Sb.c rename to src/lib/deriv_Sb.c diff --git a/deriv_Sb.h b/src/lib/deriv_Sb.h similarity index 100% rename from deriv_Sb.h rename to src/lib/deriv_Sb.h diff --git a/deriv_Sb_D_psi.c b/src/lib/deriv_Sb_D_psi.c similarity index 100% rename from deriv_Sb_D_psi.c rename to src/lib/deriv_Sb_D_psi.c diff --git a/deriv_Sb_D_psi.h b/src/lib/deriv_Sb_D_psi.h similarity index 100% rename from deriv_Sb_D_psi.h rename to src/lib/deriv_Sb_D_psi.h diff --git a/expo.c b/src/lib/expo.c similarity index 100% rename from expo.c rename to src/lib/expo.c diff --git a/expo.h b/src/lib/expo.h similarity index 100% rename from expo.h rename to src/lib/expo.h diff --git a/fatal_error.c b/src/lib/fatal_error.c similarity index 100% rename from fatal_error.c rename to src/lib/fatal_error.c diff --git a/fatal_error.h b/src/lib/fatal_error.h similarity index 100% rename from fatal_error.h rename to src/lib/fatal_error.h diff --git a/gamma.c b/src/lib/gamma.c similarity index 100% rename from gamma.c rename to src/lib/gamma.c diff --git a/gamma.h b/src/lib/gamma.h similarity index 100% rename from gamma.h rename to src/lib/gamma.h diff --git a/geometry_eo.c b/src/lib/geometry_eo.c similarity index 100% rename from geometry_eo.c rename to src/lib/geometry_eo.c diff --git a/geometry_eo.h b/src/lib/geometry_eo.h similarity index 100% rename from geometry_eo.h rename to src/lib/geometry_eo.h diff --git a/get_rectangle_staples.c b/src/lib/get_rectangle_staples.c similarity index 100% rename from get_rectangle_staples.c rename to src/lib/get_rectangle_staples.c diff --git a/get_rectangle_staples.h b/src/lib/get_rectangle_staples.h similarity index 100% rename from get_rectangle_staples.h rename to src/lib/get_rectangle_staples.h diff --git a/get_staples.c b/src/lib/get_staples.c similarity index 100% rename from get_staples.c rename to src/lib/get_staples.c diff --git a/get_staples.h b/src/lib/get_staples.h similarity index 100% rename from get_staples.h rename to src/lib/get_staples.h diff --git a/getopt.c b/src/lib/getopt.c similarity index 100% rename from getopt.c rename to src/lib/getopt.c diff --git a/getopt.h b/src/lib/getopt.h similarity index 100% rename from getopt.h rename to src/lib/getopt.h diff --git a/gettime.c b/src/lib/gettime.c similarity index 100% rename from gettime.c rename to src/lib/gettime.c diff --git a/gettime.h b/src/lib/gettime.h similarity index 100% rename from gettime.h rename to src/lib/gettime.h diff --git a/global.h b/src/lib/global.h similarity index 100% rename from global.h rename to src/lib/global.h diff --git a/hamiltonian_field.h b/src/lib/hamiltonian_field.h similarity index 100% rename from hamiltonian_field.h rename to src/lib/hamiltonian_field.h diff --git a/include/tmLQCD.h b/src/lib/include/tmLQCD.h similarity index 100% rename from include/tmLQCD.h rename to src/lib/include/tmLQCD.h diff --git a/include/tmlqcd_config.h b/src/lib/include/tmlqcd_config.h similarity index 100% rename from include/tmlqcd_config.h rename to src/lib/include/tmlqcd_config.h diff --git a/init/Makefile.in b/src/lib/init/Makefile.in similarity index 100% rename from init/Makefile.in rename to src/lib/init/Makefile.in diff --git a/init/init.h b/src/lib/init/init.h similarity index 100% rename from init/init.h rename to src/lib/init/init.h diff --git a/init/init_bispinor_field.c b/src/lib/init/init_bispinor_field.c similarity index 100% rename from init/init_bispinor_field.c rename to src/lib/init/init_bispinor_field.c diff --git a/init/init_bispinor_field.h b/src/lib/init/init_bispinor_field.h similarity index 100% rename from init/init_bispinor_field.h rename to src/lib/init/init_bispinor_field.h diff --git a/init/init_chi_spinor_field.c b/src/lib/init/init_chi_spinor_field.c similarity index 100% rename from init/init_chi_spinor_field.c rename to src/lib/init/init_chi_spinor_field.c diff --git a/init/init_chi_spinor_field.h b/src/lib/init/init_chi_spinor_field.h similarity index 100% rename from init/init_chi_spinor_field.h rename to src/lib/init/init_chi_spinor_field.h diff --git a/init/init_critical_globals.c b/src/lib/init/init_critical_globals.c similarity index 100% rename from init/init_critical_globals.c rename to src/lib/init/init_critical_globals.c diff --git a/init/init_critical_globals.h b/src/lib/init/init_critical_globals.h similarity index 100% rename from init/init_critical_globals.h rename to src/lib/init/init_critical_globals.h diff --git a/init/init_dirac_halfspinor.c b/src/lib/init/init_dirac_halfspinor.c similarity index 100% rename from init/init_dirac_halfspinor.c rename to src/lib/init/init_dirac_halfspinor.c diff --git a/init/init_dirac_halfspinor.h b/src/lib/init/init_dirac_halfspinor.h similarity index 100% rename from init/init_dirac_halfspinor.h rename to src/lib/init/init_dirac_halfspinor.h diff --git a/init/init_gauge_fg.c b/src/lib/init/init_gauge_fg.c similarity index 100% rename from init/init_gauge_fg.c rename to src/lib/init/init_gauge_fg.c diff --git a/init/init_gauge_fg.h b/src/lib/init/init_gauge_fg.h similarity index 100% rename from init/init_gauge_fg.h rename to src/lib/init/init_gauge_fg.h diff --git a/init/init_gauge_field.c b/src/lib/init/init_gauge_field.c similarity index 100% rename from init/init_gauge_field.c rename to src/lib/init/init_gauge_field.c diff --git a/init/init_gauge_field.h b/src/lib/init/init_gauge_field.h similarity index 100% rename from init/init_gauge_field.h rename to src/lib/init/init_gauge_field.h diff --git a/init/init_gauge_tmp.c b/src/lib/init/init_gauge_tmp.c similarity index 100% rename from init/init_gauge_tmp.c rename to src/lib/init/init_gauge_tmp.c diff --git a/init/init_gauge_tmp.h b/src/lib/init/init_gauge_tmp.h similarity index 100% rename from init/init_gauge_tmp.h rename to src/lib/init/init_gauge_tmp.h diff --git a/init/init_geometry_indices.c b/src/lib/init/init_geometry_indices.c similarity index 100% rename from init/init_geometry_indices.c rename to src/lib/init/init_geometry_indices.c diff --git a/init/init_geometry_indices.h b/src/lib/init/init_geometry_indices.h similarity index 100% rename from init/init_geometry_indices.h rename to src/lib/init/init_geometry_indices.h diff --git a/init/init_global_states.c b/src/lib/init/init_global_states.c similarity index 100% rename from init/init_global_states.c rename to src/lib/init/init_global_states.c diff --git a/init/init_global_states.h b/src/lib/init/init_global_states.h similarity index 100% rename from init/init_global_states.h rename to src/lib/init/init_global_states.h diff --git a/init/init_moment_field.c b/src/lib/init/init_moment_field.c similarity index 100% rename from init/init_moment_field.c rename to src/lib/init/init_moment_field.c diff --git a/init/init_moment_field.h b/src/lib/init/init_moment_field.h similarity index 100% rename from init/init_moment_field.h rename to src/lib/init/init_moment_field.h diff --git a/init/init_omp_accumulators.c b/src/lib/init/init_omp_accumulators.c similarity index 100% rename from init/init_omp_accumulators.c rename to src/lib/init/init_omp_accumulators.c diff --git a/init/init_omp_accumulators.h b/src/lib/init/init_omp_accumulators.h similarity index 100% rename from init/init_omp_accumulators.h rename to src/lib/init/init_omp_accumulators.h diff --git a/init/init_openmp.c b/src/lib/init/init_openmp.c similarity index 100% rename from init/init_openmp.c rename to src/lib/init/init_openmp.c diff --git a/init/init_openmp.h b/src/lib/init/init_openmp.h similarity index 100% rename from init/init_openmp.h rename to src/lib/init/init_openmp.h diff --git a/init/init_parallel.c b/src/lib/init/init_parallel.c similarity index 100% rename from init/init_parallel.c rename to src/lib/init/init_parallel.c diff --git a/init/init_parallel.h b/src/lib/init/init_parallel.h similarity index 100% rename from init/init_parallel.h rename to src/lib/init/init_parallel.h diff --git a/init/init_spinor_field.c b/src/lib/init/init_spinor_field.c similarity index 100% rename from init/init_spinor_field.c rename to src/lib/init/init_spinor_field.c diff --git a/init/init_spinor_field.h b/src/lib/init/init_spinor_field.h similarity index 100% rename from init/init_spinor_field.h rename to src/lib/init/init_spinor_field.h diff --git a/init/init_stout_smear_vars.c b/src/lib/init/init_stout_smear_vars.c similarity index 100% rename from init/init_stout_smear_vars.c rename to src/lib/init/init_stout_smear_vars.c diff --git a/init/init_stout_smear_vars.h b/src/lib/init/init_stout_smear_vars.h similarity index 100% rename from init/init_stout_smear_vars.h rename to src/lib/init/init_stout_smear_vars.h diff --git a/integrator.c b/src/lib/integrator.c similarity index 100% rename from integrator.c rename to src/lib/integrator.c diff --git a/integrator.h b/src/lib/integrator.h similarity index 100% rename from integrator.h rename to src/lib/integrator.h diff --git a/invert_clover_eo.c b/src/lib/invert_clover_eo.c similarity index 100% rename from invert_clover_eo.c rename to src/lib/invert_clover_eo.c diff --git a/invert_clover_eo.h b/src/lib/invert_clover_eo.h similarity index 100% rename from invert_clover_eo.h rename to src/lib/invert_clover_eo.h diff --git a/invert_doublet_eo.c b/src/lib/invert_doublet_eo.c similarity index 100% rename from invert_doublet_eo.c rename to src/lib/invert_doublet_eo.c diff --git a/invert_doublet_eo.h b/src/lib/invert_doublet_eo.h similarity index 100% rename from invert_doublet_eo.h rename to src/lib/invert_doublet_eo.h diff --git a/invert_eo.c b/src/lib/invert_eo.c similarity index 99% rename from invert_eo.c rename to src/lib/invert_eo.c index 25ee4a297..997cab021 100644 --- a/invert_eo.c +++ b/src/lib/invert_eo.c @@ -34,6 +34,7 @@ #ifdef HAVE_CONFIG_H #include #endif + #include #include "gamma.h" #include "global.h" diff --git a/invert_eo.h b/src/lib/invert_eo.h similarity index 100% rename from invert_eo.h rename to src/lib/invert_eo.h diff --git a/invert_overlap.c b/src/lib/invert_overlap.c similarity index 100% rename from invert_overlap.c rename to src/lib/invert_overlap.c diff --git a/invert_overlap.h b/src/lib/invert_overlap.h similarity index 100% rename from invert_overlap.h rename to src/lib/invert_overlap.h diff --git a/io/DML_crc32.c b/src/lib/io/DML_crc32.c similarity index 100% rename from io/DML_crc32.c rename to src/lib/io/DML_crc32.c diff --git a/io/deri_write_stdout.c b/src/lib/io/deri_write_stdout.c similarity index 100% rename from io/deri_write_stdout.c rename to src/lib/io/deri_write_stdout.c diff --git a/io/deri_write_stdout.h b/src/lib/io/deri_write_stdout.h similarity index 100% rename from io/deri_write_stdout.h rename to src/lib/io/deri_write_stdout.h diff --git a/io/dml.c b/src/lib/io/dml.c similarity index 100% rename from io/dml.c rename to src/lib/io/dml.c diff --git a/io/dml.h b/src/lib/io/dml.h similarity index 100% rename from io/dml.h rename to src/lib/io/dml.h diff --git a/io/eospinor.h b/src/lib/io/eospinor.h similarity index 100% rename from io/eospinor.h rename to src/lib/io/eospinor.h diff --git a/io/eospinor.ih b/src/lib/io/eospinor.ih similarity index 100% rename from io/eospinor.ih rename to src/lib/io/eospinor.ih diff --git a/io/eospinor_read.c b/src/lib/io/eospinor_read.c similarity index 100% rename from io/eospinor_read.c rename to src/lib/io/eospinor_read.c diff --git a/io/eospinor_write.c b/src/lib/io/eospinor_write.c similarity index 100% rename from io/eospinor_write.c rename to src/lib/io/eospinor_write.c diff --git a/io/gauge.h b/src/lib/io/gauge.h similarity index 100% rename from io/gauge.h rename to src/lib/io/gauge.h diff --git a/io/gauge.ih b/src/lib/io/gauge.ih similarity index 100% rename from io/gauge.ih rename to src/lib/io/gauge.ih diff --git a/io/gauge_read.c b/src/lib/io/gauge_read.c similarity index 100% rename from io/gauge_read.c rename to src/lib/io/gauge_read.c diff --git a/io/gauge_read_binary.c b/src/lib/io/gauge_read_binary.c similarity index 100% rename from io/gauge_read_binary.c rename to src/lib/io/gauge_read_binary.c diff --git a/io/gauge_write.c b/src/lib/io/gauge_write.c similarity index 100% rename from io/gauge_write.c rename to src/lib/io/gauge_write.c diff --git a/io/gauge_write_binary.c b/src/lib/io/gauge_write_binary.c similarity index 100% rename from io/gauge_write_binary.c rename to src/lib/io/gauge_write_binary.c diff --git a/io/gauge_write_luscher_binary.c b/src/lib/io/gauge_write_luscher_binary.c similarity index 100% rename from io/gauge_write_luscher_binary.c rename to src/lib/io/gauge_write_luscher_binary.c diff --git a/io/gauge_write_luscher_binary.h b/src/lib/io/gauge_write_luscher_binary.h similarity index 100% rename from io/gauge_write_luscher_binary.h rename to src/lib/io/gauge_write_luscher_binary.h diff --git a/io/io_cm.c b/src/lib/io/io_cm.c similarity index 100% rename from io/io_cm.c rename to src/lib/io/io_cm.c diff --git a/io/io_cm.h b/src/lib/io/io_cm.h similarity index 100% rename from io/io_cm.h rename to src/lib/io/io_cm.h diff --git a/io/params.h b/src/lib/io/params.h similarity index 100% rename from io/params.h rename to src/lib/io/params.h diff --git a/io/params.ih b/src/lib/io/params.ih similarity index 100% rename from io/params.ih rename to src/lib/io/params.ih diff --git a/io/params_construct_InverterInfo.c b/src/lib/io/params_construct_InverterInfo.c similarity index 100% rename from io/params_construct_InverterInfo.c rename to src/lib/io/params_construct_InverterInfo.c diff --git a/io/params_construct_ildgFormat.c b/src/lib/io/params_construct_ildgFormat.c similarity index 100% rename from io/params_construct_ildgFormat.c rename to src/lib/io/params_construct_ildgFormat.c diff --git a/io/params_construct_propagatorFormat.c b/src/lib/io/params_construct_propagatorFormat.c similarity index 100% rename from io/params_construct_propagatorFormat.c rename to src/lib/io/params_construct_propagatorFormat.c diff --git a/io/params_construct_sourceFormat.c b/src/lib/io/params_construct_sourceFormat.c similarity index 100% rename from io/params_construct_sourceFormat.c rename to src/lib/io/params_construct_sourceFormat.c diff --git a/io/params_construct_xlfInfo.c b/src/lib/io/params_construct_xlfInfo.c similarity index 100% rename from io/params_construct_xlfInfo.c rename to src/lib/io/params_construct_xlfInfo.c diff --git a/io/selector.h b/src/lib/io/selector.h similarity index 100% rename from io/selector.h rename to src/lib/io/selector.h diff --git a/io/spinor.h b/src/lib/io/spinor.h similarity index 100% rename from io/spinor.h rename to src/lib/io/spinor.h diff --git a/io/spinor.ih b/src/lib/io/spinor.ih similarity index 100% rename from io/spinor.ih rename to src/lib/io/spinor.ih diff --git a/io/spinor_read.c b/src/lib/io/spinor_read.c similarity index 100% rename from io/spinor_read.c rename to src/lib/io/spinor_read.c diff --git a/io/spinor_read_binary.c b/src/lib/io/spinor_read_binary.c similarity index 100% rename from io/spinor_read_binary.c rename to src/lib/io/spinor_read_binary.c diff --git a/io/spinor_write.c b/src/lib/io/spinor_write.c similarity index 100% rename from io/spinor_write.c rename to src/lib/io/spinor_write.c diff --git a/io/spinor_write_binary.c b/src/lib/io/spinor_write_binary.c similarity index 100% rename from io/spinor_write_binary.c rename to src/lib/io/spinor_write_binary.c diff --git a/io/spinor_write_info.c b/src/lib/io/spinor_write_info.c similarity index 100% rename from io/spinor_write_info.c rename to src/lib/io/spinor_write_info.c diff --git a/io/spinor_write_propagator_format.c b/src/lib/io/spinor_write_propagator_format.c similarity index 100% rename from io/spinor_write_propagator_format.c rename to src/lib/io/spinor_write_propagator_format.c diff --git a/io/spinor_write_propagator_type.c b/src/lib/io/spinor_write_propagator_type.c similarity index 100% rename from io/spinor_write_propagator_type.c rename to src/lib/io/spinor_write_propagator_type.c diff --git a/io/spinor_write_source_format.c b/src/lib/io/spinor_write_source_format.c similarity index 100% rename from io/spinor_write_source_format.c rename to src/lib/io/spinor_write_source_format.c diff --git a/io/spinor_write_stdout.c b/src/lib/io/spinor_write_stdout.c similarity index 100% rename from io/spinor_write_stdout.c rename to src/lib/io/spinor_write_stdout.c diff --git a/io/spinor_write_stdout.h b/src/lib/io/spinor_write_stdout.h similarity index 100% rename from io/spinor_write_stdout.h rename to src/lib/io/spinor_write_stdout.h diff --git a/io/sw_write_stdout.c b/src/lib/io/sw_write_stdout.c similarity index 100% rename from io/sw_write_stdout.c rename to src/lib/io/sw_write_stdout.c diff --git a/io/sw_write_stdout.h b/src/lib/io/sw_write_stdout.h similarity index 100% rename from io/sw_write_stdout.h rename to src/lib/io/sw_write_stdout.h diff --git a/io/utils.c b/src/lib/io/utils.c similarity index 100% rename from io/utils.c rename to src/lib/io/utils.c diff --git a/io/utils.h b/src/lib/io/utils.h similarity index 99% rename from io/utils.h rename to src/lib/io/utils.h index afcca1553..85e98a5e2 100644 --- a/io/utils.h +++ b/src/lib/io/utils.h @@ -20,9 +20,7 @@ #ifndef _UTILS_H #define _UTILS_H -#ifdef HAVE_CONFIG_H #include -#endif #include "io/dml.h" #include "io/params.h" diff --git a/io/utils.ih b/src/lib/io/utils.ih similarity index 96% rename from io/utils.ih rename to src/lib/io/utils.ih index 073bd64b5..dd963b5b9 100644 --- a/io/utils.ih +++ b/src/lib/io/utils.ih @@ -18,9 +18,7 @@ ***********************************************************************/ #include -#ifdef HAVE_CONFIG_H #include "tmlqcd_config.h" -#endif #include #include @@ -29,7 +27,7 @@ #include #include #include -#ifdef MPI +#ifdef TM_USE_MPI #include #endif #include diff --git a/io/utils_close_reader_record.c b/src/lib/io/utils_close_reader_record.c similarity index 100% rename from io/utils_close_reader_record.c rename to src/lib/io/utils_close_reader_record.c diff --git a/io/utils_close_writer_record.c b/src/lib/io/utils_close_writer_record.c similarity index 100% rename from io/utils_close_writer_record.c rename to src/lib/io/utils_close_writer_record.c diff --git a/io/utils_construct_reader.c b/src/lib/io/utils_construct_reader.c similarity index 97% rename from io/utils_construct_reader.c rename to src/lib/io/utils_construct_reader.c index 085206786..2714455b2 100644 --- a/io/utils_construct_reader.c +++ b/src/lib/io/utils_construct_reader.c @@ -1,5 +1,7 @@ #include "utils.ih" +extern MPI_Comm g_cart_grid; + void construct_reader(READER **reader, char *filename) { LIME_FILE *fh = NULL; int status = 0; diff --git a/io/utils_construct_writer.c b/src/lib/io/utils_construct_writer.c similarity index 100% rename from io/utils_construct_writer.c rename to src/lib/io/utils_construct_writer.c diff --git a/io/utils_destruct_reader.c b/src/lib/io/utils_destruct_reader.c similarity index 100% rename from io/utils_destruct_reader.c rename to src/lib/io/utils_destruct_reader.c diff --git a/io/utils_destruct_writer.c b/src/lib/io/utils_destruct_writer.c similarity index 100% rename from io/utils_destruct_writer.c rename to src/lib/io/utils_destruct_writer.c diff --git a/io/utils_engineering.c b/src/lib/io/utils_engineering.c similarity index 100% rename from io/utils_engineering.c rename to src/lib/io/utils_engineering.c diff --git a/io/utils_kill_with_error.c b/src/lib/io/utils_kill_with_error.c similarity index 100% rename from io/utils_kill_with_error.c rename to src/lib/io/utils_kill_with_error.c diff --git a/io/utils_parse_checksum_xml.c b/src/lib/io/utils_parse_checksum_xml.c similarity index 100% rename from io/utils_parse_checksum_xml.c rename to src/lib/io/utils_parse_checksum_xml.c diff --git a/io/utils_parse_ildgformat_xml.c b/src/lib/io/utils_parse_ildgformat_xml.c similarity index 100% rename from io/utils_parse_ildgformat_xml.c rename to src/lib/io/utils_parse_ildgformat_xml.c diff --git a/io/utils_parse_propagator_type.c b/src/lib/io/utils_parse_propagator_type.c similarity index 100% rename from io/utils_parse_propagator_type.c rename to src/lib/io/utils_parse_propagator_type.c diff --git a/io/utils_read_message.c b/src/lib/io/utils_read_message.c similarity index 100% rename from io/utils_read_message.c rename to src/lib/io/utils_read_message.c diff --git a/io/utils_write_checksum.c b/src/lib/io/utils_write_checksum.c similarity index 100% rename from io/utils_write_checksum.c rename to src/lib/io/utils_write_checksum.c diff --git a/io/utils_write_first_message.c b/src/lib/io/utils_write_first_message.c similarity index 100% rename from io/utils_write_first_message.c rename to src/lib/io/utils_write_first_message.c diff --git a/io/utils_write_header.c b/src/lib/io/utils_write_header.c similarity index 100% rename from io/utils_write_header.c rename to src/lib/io/utils_write_header.c diff --git a/io/utils_write_ildg_format.c b/src/lib/io/utils_write_ildg_format.c similarity index 100% rename from io/utils_write_ildg_format.c rename to src/lib/io/utils_write_ildg_format.c diff --git a/io/utils_write_inverter_info.c b/src/lib/io/utils_write_inverter_info.c similarity index 100% rename from io/utils_write_inverter_info.c rename to src/lib/io/utils_write_inverter_info.c diff --git a/io/utils_write_message.c b/src/lib/io/utils_write_message.c similarity index 100% rename from io/utils_write_message.c rename to src/lib/io/utils_write_message.c diff --git a/io/utils_write_xlf.c b/src/lib/io/utils_write_xlf.c similarity index 100% rename from io/utils_write_xlf.c rename to src/lib/io/utils_write_xlf.c diff --git a/io/utils_write_xlf_xml.c b/src/lib/io/utils_write_xlf_xml.c similarity index 100% rename from io/utils_write_xlf_xml.c rename to src/lib/io/utils_write_xlf_xml.c diff --git a/kahan_summation.h b/src/lib/kahan_summation.h similarity index 100% rename from kahan_summation.h rename to src/lib/kahan_summation.h diff --git a/linalg/Makefile.in b/src/lib/linalg/Makefile.in similarity index 100% rename from linalg/Makefile.in rename to src/lib/linalg/Makefile.in diff --git a/linalg/add.c b/src/lib/linalg/add.c similarity index 100% rename from linalg/add.c rename to src/lib/linalg/add.c diff --git a/linalg/add.h b/src/lib/linalg/add.h similarity index 100% rename from linalg/add.h rename to src/lib/linalg/add.h diff --git a/linalg/addto_32.c b/src/lib/linalg/addto_32.c similarity index 100% rename from linalg/addto_32.c rename to src/lib/linalg/addto_32.c diff --git a/linalg/addto_32.h b/src/lib/linalg/addto_32.h similarity index 100% rename from linalg/addto_32.h rename to src/lib/linalg/addto_32.h diff --git a/linalg/assign.c b/src/lib/linalg/assign.c similarity index 100% rename from linalg/assign.c rename to src/lib/linalg/assign.c diff --git a/linalg/assign.h b/src/lib/linalg/assign.h similarity index 100% rename from linalg/assign.h rename to src/lib/linalg/assign.h diff --git a/linalg/assign_add_mul.c b/src/lib/linalg/assign_add_mul.c similarity index 100% rename from linalg/assign_add_mul.c rename to src/lib/linalg/assign_add_mul.c diff --git a/linalg/assign_add_mul.h b/src/lib/linalg/assign_add_mul.h similarity index 100% rename from linalg/assign_add_mul.h rename to src/lib/linalg/assign_add_mul.h diff --git a/linalg/assign_add_mul_add_mul.c b/src/lib/linalg/assign_add_mul_add_mul.c similarity index 100% rename from linalg/assign_add_mul_add_mul.c rename to src/lib/linalg/assign_add_mul_add_mul.c diff --git a/linalg/assign_add_mul_add_mul.h b/src/lib/linalg/assign_add_mul_add_mul.h similarity index 100% rename from linalg/assign_add_mul_add_mul.h rename to src/lib/linalg/assign_add_mul_add_mul.h diff --git a/linalg/assign_add_mul_add_mul_r.c b/src/lib/linalg/assign_add_mul_add_mul_r.c similarity index 100% rename from linalg/assign_add_mul_add_mul_r.c rename to src/lib/linalg/assign_add_mul_add_mul_r.c diff --git a/linalg/assign_add_mul_add_mul_r.h b/src/lib/linalg/assign_add_mul_add_mul_r.h similarity index 100% rename from linalg/assign_add_mul_add_mul_r.h rename to src/lib/linalg/assign_add_mul_add_mul_r.h diff --git a/linalg/assign_add_mul_body.c b/src/lib/linalg/assign_add_mul_body.c similarity index 100% rename from linalg/assign_add_mul_body.c rename to src/lib/linalg/assign_add_mul_body.c diff --git a/linalg/assign_add_mul_r.c b/src/lib/linalg/assign_add_mul_r.c similarity index 100% rename from linalg/assign_add_mul_r.c rename to src/lib/linalg/assign_add_mul_r.c diff --git a/linalg/assign_add_mul_r.h b/src/lib/linalg/assign_add_mul_r.h similarity index 100% rename from linalg/assign_add_mul_r.h rename to src/lib/linalg/assign_add_mul_r.h diff --git a/linalg/assign_add_mul_r_32.c b/src/lib/linalg/assign_add_mul_r_32.c similarity index 93% rename from linalg/assign_add_mul_r_32.c rename to src/lib/linalg/assign_add_mul_r_32.c index 8df54858b..9f6b1a72f 100644 --- a/linalg/assign_add_mul_r_32.c +++ b/src/lib/linalg/assign_add_mul_r_32.c @@ -28,16 +28,13 @@ #ifdef HAVE_CONFIG_H #include #endif -#ifdef TM_USE_OMP -#include -#endif #include #include #include #include "assign_add_mul_r_32.h" #include "su3.h" -inline void assign_add_mul_r_32_orphaned(spinor32 *const R, spinor32 *const S, const float c, +void assign_add_mul_r_32_orphaned(spinor32 *const R, spinor32 *const S, const float c, const int N) { #ifdef TM_USE_OMP #pragma omp parallel for diff --git a/linalg/assign_add_mul_r_32.h b/src/lib/linalg/assign_add_mul_r_32.h similarity index 100% rename from linalg/assign_add_mul_r_32.h rename to src/lib/linalg/assign_add_mul_r_32.h diff --git a/linalg/assign_add_mul_r_add_mul.c b/src/lib/linalg/assign_add_mul_r_add_mul.c similarity index 100% rename from linalg/assign_add_mul_r_add_mul.c rename to src/lib/linalg/assign_add_mul_r_add_mul.c diff --git a/linalg/assign_add_mul_r_add_mul.h b/src/lib/linalg/assign_add_mul_r_add_mul.h similarity index 100% rename from linalg/assign_add_mul_r_add_mul.h rename to src/lib/linalg/assign_add_mul_r_add_mul.h diff --git a/linalg/assign_diff_mul.c b/src/lib/linalg/assign_diff_mul.c similarity index 100% rename from linalg/assign_diff_mul.c rename to src/lib/linalg/assign_diff_mul.c diff --git a/linalg/assign_diff_mul.h b/src/lib/linalg/assign_diff_mul.h similarity index 100% rename from linalg/assign_diff_mul.h rename to src/lib/linalg/assign_diff_mul.h diff --git a/linalg/assign_mul_add.c b/src/lib/linalg/assign_mul_add.c similarity index 100% rename from linalg/assign_mul_add.c rename to src/lib/linalg/assign_mul_add.c diff --git a/linalg/assign_mul_add.h b/src/lib/linalg/assign_mul_add.h similarity index 100% rename from linalg/assign_mul_add.h rename to src/lib/linalg/assign_mul_add.h diff --git a/linalg/assign_mul_add_mul.c b/src/lib/linalg/assign_mul_add_mul.c similarity index 100% rename from linalg/assign_mul_add_mul.c rename to src/lib/linalg/assign_mul_add_mul.c diff --git a/linalg/assign_mul_add_mul.h b/src/lib/linalg/assign_mul_add_mul.h similarity index 100% rename from linalg/assign_mul_add_mul.h rename to src/lib/linalg/assign_mul_add_mul.h diff --git a/linalg/assign_mul_add_mul_add_mul_add_mul_r.c b/src/lib/linalg/assign_mul_add_mul_add_mul_add_mul_r.c similarity index 100% rename from linalg/assign_mul_add_mul_add_mul_add_mul_r.c rename to src/lib/linalg/assign_mul_add_mul_add_mul_add_mul_r.c diff --git a/linalg/assign_mul_add_mul_add_mul_add_mul_r.h b/src/lib/linalg/assign_mul_add_mul_add_mul_add_mul_r.h similarity index 100% rename from linalg/assign_mul_add_mul_add_mul_add_mul_r.h rename to src/lib/linalg/assign_mul_add_mul_add_mul_add_mul_r.h diff --git a/linalg/assign_mul_add_mul_add_mul_r.c b/src/lib/linalg/assign_mul_add_mul_add_mul_r.c similarity index 100% rename from linalg/assign_mul_add_mul_add_mul_r.c rename to src/lib/linalg/assign_mul_add_mul_add_mul_r.c diff --git a/linalg/assign_mul_add_mul_add_mul_r.h b/src/lib/linalg/assign_mul_add_mul_add_mul_r.h similarity index 100% rename from linalg/assign_mul_add_mul_add_mul_r.h rename to src/lib/linalg/assign_mul_add_mul_add_mul_r.h diff --git a/linalg/assign_mul_add_mul_r.c b/src/lib/linalg/assign_mul_add_mul_r.c similarity index 100% rename from linalg/assign_mul_add_mul_r.c rename to src/lib/linalg/assign_mul_add_mul_r.c diff --git a/linalg/assign_mul_add_mul_r.h b/src/lib/linalg/assign_mul_add_mul_r.h similarity index 100% rename from linalg/assign_mul_add_mul_r.h rename to src/lib/linalg/assign_mul_add_mul_r.h diff --git a/linalg/assign_mul_add_mul_r_32.c b/src/lib/linalg/assign_mul_add_mul_r_32.c similarity index 100% rename from linalg/assign_mul_add_mul_r_32.c rename to src/lib/linalg/assign_mul_add_mul_r_32.c diff --git a/linalg/assign_mul_add_mul_r_32.h b/src/lib/linalg/assign_mul_add_mul_r_32.h similarity index 100% rename from linalg/assign_mul_add_mul_r_32.h rename to src/lib/linalg/assign_mul_add_mul_r_32.h diff --git a/linalg/assign_mul_add_r.c b/src/lib/linalg/assign_mul_add_r.c similarity index 100% rename from linalg/assign_mul_add_r.c rename to src/lib/linalg/assign_mul_add_r.c diff --git a/linalg/assign_mul_add_r.h b/src/lib/linalg/assign_mul_add_r.h similarity index 100% rename from linalg/assign_mul_add_r.h rename to src/lib/linalg/assign_mul_add_r.h diff --git a/linalg/assign_mul_add_r_32.c b/src/lib/linalg/assign_mul_add_r_32.c similarity index 100% rename from linalg/assign_mul_add_r_32.c rename to src/lib/linalg/assign_mul_add_r_32.c diff --git a/linalg/assign_mul_add_r_32.h b/src/lib/linalg/assign_mul_add_r_32.h similarity index 100% rename from linalg/assign_mul_add_r_32.h rename to src/lib/linalg/assign_mul_add_r_32.h diff --git a/linalg/assign_mul_add_r_and_square.c b/src/lib/linalg/assign_mul_add_r_and_square.c similarity index 100% rename from linalg/assign_mul_add_r_and_square.c rename to src/lib/linalg/assign_mul_add_r_and_square.c diff --git a/linalg/assign_mul_add_r_and_square.h b/src/lib/linalg/assign_mul_add_r_and_square.h similarity index 100% rename from linalg/assign_mul_add_r_and_square.h rename to src/lib/linalg/assign_mul_add_r_and_square.h diff --git a/linalg/assign_mul_bra_add_mul_ket_add.c b/src/lib/linalg/assign_mul_bra_add_mul_ket_add.c similarity index 100% rename from linalg/assign_mul_bra_add_mul_ket_add.c rename to src/lib/linalg/assign_mul_bra_add_mul_ket_add.c diff --git a/linalg/assign_mul_bra_add_mul_ket_add.h b/src/lib/linalg/assign_mul_bra_add_mul_ket_add.h similarity index 100% rename from linalg/assign_mul_bra_add_mul_ket_add.h rename to src/lib/linalg/assign_mul_bra_add_mul_ket_add.h diff --git a/linalg/assign_mul_bra_add_mul_ket_add_r.c b/src/lib/linalg/assign_mul_bra_add_mul_ket_add_r.c similarity index 100% rename from linalg/assign_mul_bra_add_mul_ket_add_r.c rename to src/lib/linalg/assign_mul_bra_add_mul_ket_add_r.c diff --git a/linalg/assign_mul_bra_add_mul_ket_add_r.h b/src/lib/linalg/assign_mul_bra_add_mul_ket_add_r.h similarity index 100% rename from linalg/assign_mul_bra_add_mul_ket_add_r.h rename to src/lib/linalg/assign_mul_bra_add_mul_ket_add_r.h diff --git a/linalg/assign_mul_bra_add_mul_r.c b/src/lib/linalg/assign_mul_bra_add_mul_r.c similarity index 100% rename from linalg/assign_mul_bra_add_mul_r.c rename to src/lib/linalg/assign_mul_bra_add_mul_r.c diff --git a/linalg/assign_mul_bra_add_mul_r.h b/src/lib/linalg/assign_mul_bra_add_mul_r.h similarity index 100% rename from linalg/assign_mul_bra_add_mul_r.h rename to src/lib/linalg/assign_mul_bra_add_mul_r.h diff --git a/linalg/assign_to_32.c b/src/lib/linalg/assign_to_32.c similarity index 100% rename from linalg/assign_to_32.c rename to src/lib/linalg/assign_to_32.c diff --git a/linalg/assign_to_32.h b/src/lib/linalg/assign_to_32.h similarity index 100% rename from linalg/assign_to_32.h rename to src/lib/linalg/assign_to_32.h diff --git a/linalg/blas.h b/src/lib/linalg/blas.h similarity index 100% rename from linalg/blas.h rename to src/lib/linalg/blas.h diff --git a/linalg/comp_decomp.c b/src/lib/linalg/comp_decomp.c similarity index 100% rename from linalg/comp_decomp.c rename to src/lib/linalg/comp_decomp.c diff --git a/linalg/comp_decomp.h b/src/lib/linalg/comp_decomp.h similarity index 100% rename from linalg/comp_decomp.h rename to src/lib/linalg/comp_decomp.h diff --git a/linalg/convert_eo_to_lexic.c b/src/lib/linalg/convert_eo_to_lexic.c similarity index 100% rename from linalg/convert_eo_to_lexic.c rename to src/lib/linalg/convert_eo_to_lexic.c diff --git a/linalg/convert_eo_to_lexic.h b/src/lib/linalg/convert_eo_to_lexic.h similarity index 100% rename from linalg/convert_eo_to_lexic.h rename to src/lib/linalg/convert_eo_to_lexic.h diff --git a/linalg/convert_even_to_lexic.c b/src/lib/linalg/convert_even_to_lexic.c similarity index 100% rename from linalg/convert_even_to_lexic.c rename to src/lib/linalg/convert_even_to_lexic.c diff --git a/linalg/convert_even_to_lexic.h b/src/lib/linalg/convert_even_to_lexic.h similarity index 100% rename from linalg/convert_even_to_lexic.h rename to src/lib/linalg/convert_even_to_lexic.h diff --git a/linalg/convert_odd_to_lexic.c b/src/lib/linalg/convert_odd_to_lexic.c similarity index 100% rename from linalg/convert_odd_to_lexic.c rename to src/lib/linalg/convert_odd_to_lexic.c diff --git a/linalg/convert_odd_to_lexic.h b/src/lib/linalg/convert_odd_to_lexic.h similarity index 100% rename from linalg/convert_odd_to_lexic.h rename to src/lib/linalg/convert_odd_to_lexic.h diff --git a/linalg/diff.c b/src/lib/linalg/diff.c similarity index 100% rename from linalg/diff.c rename to src/lib/linalg/diff.c diff --git a/linalg/diff.h b/src/lib/linalg/diff.h similarity index 100% rename from linalg/diff.h rename to src/lib/linalg/diff.h diff --git a/linalg/diff_32.c b/src/lib/linalg/diff_32.c similarity index 100% rename from linalg/diff_32.c rename to src/lib/linalg/diff_32.c diff --git a/linalg/diff_32.h b/src/lib/linalg/diff_32.h similarity index 100% rename from linalg/diff_32.h rename to src/lib/linalg/diff_32.h diff --git a/linalg/diff_and_square_norm.c b/src/lib/linalg/diff_and_square_norm.c similarity index 100% rename from linalg/diff_and_square_norm.c rename to src/lib/linalg/diff_and_square_norm.c diff --git a/linalg/diff_and_square_norm.h b/src/lib/linalg/diff_and_square_norm.h similarity index 100% rename from linalg/diff_and_square_norm.h rename to src/lib/linalg/diff_and_square_norm.h diff --git a/linalg/fortran.h b/src/lib/linalg/fortran.h similarity index 100% rename from linalg/fortran.h rename to src/lib/linalg/fortran.h diff --git a/linalg/lapack.h b/src/lib/linalg/lapack.h similarity index 100% rename from linalg/lapack.h rename to src/lib/linalg/lapack.h diff --git a/linalg/map_to_blas.h b/src/lib/linalg/map_to_blas.h similarity index 100% rename from linalg/map_to_blas.h rename to src/lib/linalg/map_to_blas.h diff --git a/linalg/mattimesvec.c b/src/lib/linalg/mattimesvec.c similarity index 100% rename from linalg/mattimesvec.c rename to src/lib/linalg/mattimesvec.c diff --git a/linalg/mattimesvec.h b/src/lib/linalg/mattimesvec.h similarity index 100% rename from linalg/mattimesvec.h rename to src/lib/linalg/mattimesvec.h diff --git a/linalg/mul.c b/src/lib/linalg/mul.c similarity index 100% rename from linalg/mul.c rename to src/lib/linalg/mul.c diff --git a/linalg/mul.h b/src/lib/linalg/mul.h similarity index 100% rename from linalg/mul.h rename to src/lib/linalg/mul.h diff --git a/linalg/mul_add_mul.c b/src/lib/linalg/mul_add_mul.c similarity index 100% rename from linalg/mul_add_mul.c rename to src/lib/linalg/mul_add_mul.c diff --git a/linalg/mul_add_mul.h b/src/lib/linalg/mul_add_mul.h similarity index 100% rename from linalg/mul_add_mul.h rename to src/lib/linalg/mul_add_mul.h diff --git a/linalg/mul_add_mul_r.c b/src/lib/linalg/mul_add_mul_r.c similarity index 100% rename from linalg/mul_add_mul_r.c rename to src/lib/linalg/mul_add_mul_r.c diff --git a/linalg/mul_add_mul_r.h b/src/lib/linalg/mul_add_mul_r.h similarity index 100% rename from linalg/mul_add_mul_r.h rename to src/lib/linalg/mul_add_mul_r.h diff --git a/linalg/mul_diff_mul.c b/src/lib/linalg/mul_diff_mul.c similarity index 100% rename from linalg/mul_diff_mul.c rename to src/lib/linalg/mul_diff_mul.c diff --git a/linalg/mul_diff_mul.h b/src/lib/linalg/mul_diff_mul.h similarity index 100% rename from linalg/mul_diff_mul.h rename to src/lib/linalg/mul_diff_mul.h diff --git a/linalg/mul_diff_mul_r.c b/src/lib/linalg/mul_diff_mul_r.c similarity index 100% rename from linalg/mul_diff_mul_r.c rename to src/lib/linalg/mul_diff_mul_r.c diff --git a/linalg/mul_diff_mul_r.h b/src/lib/linalg/mul_diff_mul_r.h similarity index 100% rename from linalg/mul_diff_mul_r.h rename to src/lib/linalg/mul_diff_mul_r.h diff --git a/linalg/mul_diff_r.c b/src/lib/linalg/mul_diff_r.c similarity index 100% rename from linalg/mul_diff_r.c rename to src/lib/linalg/mul_diff_r.c diff --git a/linalg/mul_diff_r.h b/src/lib/linalg/mul_diff_r.h similarity index 100% rename from linalg/mul_diff_r.h rename to src/lib/linalg/mul_diff_r.h diff --git a/linalg/mul_gamma5.c b/src/lib/linalg/mul_gamma5.c similarity index 100% rename from linalg/mul_gamma5.c rename to src/lib/linalg/mul_gamma5.c diff --git a/linalg/mul_gamma5.h b/src/lib/linalg/mul_gamma5.h similarity index 100% rename from linalg/mul_gamma5.h rename to src/lib/linalg/mul_gamma5.h diff --git a/linalg/mul_r.c b/src/lib/linalg/mul_r.c similarity index 100% rename from linalg/mul_r.c rename to src/lib/linalg/mul_r.c diff --git a/linalg/mul_r.h b/src/lib/linalg/mul_r.h similarity index 100% rename from linalg/mul_r.h rename to src/lib/linalg/mul_r.h diff --git a/linalg/mul_r_32.c b/src/lib/linalg/mul_r_32.c similarity index 100% rename from linalg/mul_r_32.c rename to src/lib/linalg/mul_r_32.c diff --git a/linalg/mul_r_32.h b/src/lib/linalg/mul_r_32.h similarity index 100% rename from linalg/mul_r_32.h rename to src/lib/linalg/mul_r_32.h diff --git a/linalg/mul_r_gamma5.c b/src/lib/linalg/mul_r_gamma5.c similarity index 100% rename from linalg/mul_r_gamma5.c rename to src/lib/linalg/mul_r_gamma5.c diff --git a/linalg/mul_r_gamma5.h b/src/lib/linalg/mul_r_gamma5.h similarity index 100% rename from linalg/mul_r_gamma5.h rename to src/lib/linalg/mul_r_gamma5.h diff --git a/linalg/print_spinor.c b/src/lib/linalg/print_spinor.c similarity index 100% rename from linalg/print_spinor.c rename to src/lib/linalg/print_spinor.c diff --git a/linalg/print_spinor.h b/src/lib/linalg/print_spinor.h similarity index 100% rename from linalg/print_spinor.h rename to src/lib/linalg/print_spinor.h diff --git a/linalg/print_spinor_similar_components.c b/src/lib/linalg/print_spinor_similar_components.c similarity index 100% rename from linalg/print_spinor_similar_components.c rename to src/lib/linalg/print_spinor_similar_components.c diff --git a/linalg/print_spinor_similar_components.h b/src/lib/linalg/print_spinor_similar_components.h similarity index 100% rename from linalg/print_spinor_similar_components.h rename to src/lib/linalg/print_spinor_similar_components.h diff --git a/linalg/ratio.c b/src/lib/linalg/ratio.c similarity index 100% rename from linalg/ratio.c rename to src/lib/linalg/ratio.c diff --git a/linalg/ratio.h b/src/lib/linalg/ratio.h similarity index 100% rename from linalg/ratio.h rename to src/lib/linalg/ratio.h diff --git a/linalg/scalar_prod.c b/src/lib/linalg/scalar_prod.c similarity index 100% rename from linalg/scalar_prod.c rename to src/lib/linalg/scalar_prod.c diff --git a/linalg/scalar_prod.h b/src/lib/linalg/scalar_prod.h similarity index 100% rename from linalg/scalar_prod.h rename to src/lib/linalg/scalar_prod.h diff --git a/linalg/scalar_prod_body.c b/src/lib/linalg/scalar_prod_body.c similarity index 100% rename from linalg/scalar_prod_body.c rename to src/lib/linalg/scalar_prod_body.c diff --git a/linalg/scalar_prod_i.c b/src/lib/linalg/scalar_prod_i.c similarity index 100% rename from linalg/scalar_prod_i.c rename to src/lib/linalg/scalar_prod_i.c diff --git a/linalg/scalar_prod_i.h b/src/lib/linalg/scalar_prod_i.h similarity index 100% rename from linalg/scalar_prod_i.h rename to src/lib/linalg/scalar_prod_i.h diff --git a/linalg/scalar_prod_r.c b/src/lib/linalg/scalar_prod_r.c similarity index 100% rename from linalg/scalar_prod_r.c rename to src/lib/linalg/scalar_prod_r.c diff --git a/linalg/scalar_prod_r.h b/src/lib/linalg/scalar_prod_r.h similarity index 100% rename from linalg/scalar_prod_r.h rename to src/lib/linalg/scalar_prod_r.h diff --git a/linalg/scalar_prod_r_32.c b/src/lib/linalg/scalar_prod_r_32.c similarity index 100% rename from linalg/scalar_prod_r_32.c rename to src/lib/linalg/scalar_prod_r_32.c diff --git a/linalg/scalar_prod_r_32.h b/src/lib/linalg/scalar_prod_r_32.h similarity index 100% rename from linalg/scalar_prod_r_32.h rename to src/lib/linalg/scalar_prod_r_32.h diff --git a/linalg/set_even_to_zero.c b/src/lib/linalg/set_even_to_zero.c similarity index 100% rename from linalg/set_even_to_zero.c rename to src/lib/linalg/set_even_to_zero.c diff --git a/linalg/set_even_to_zero.h b/src/lib/linalg/set_even_to_zero.h similarity index 100% rename from linalg/set_even_to_zero.h rename to src/lib/linalg/set_even_to_zero.h diff --git a/linalg/square_and_minmax.c b/src/lib/linalg/square_and_minmax.c similarity index 100% rename from linalg/square_and_minmax.c rename to src/lib/linalg/square_and_minmax.c diff --git a/linalg/square_and_minmax.h b/src/lib/linalg/square_and_minmax.h similarity index 100% rename from linalg/square_and_minmax.h rename to src/lib/linalg/square_and_minmax.h diff --git a/linalg/square_and_prod_r.c b/src/lib/linalg/square_and_prod_r.c similarity index 100% rename from linalg/square_and_prod_r.c rename to src/lib/linalg/square_and_prod_r.c diff --git a/linalg/square_and_prod_r.h b/src/lib/linalg/square_and_prod_r.h similarity index 100% rename from linalg/square_and_prod_r.h rename to src/lib/linalg/square_and_prod_r.h diff --git a/linalg/square_norm.c b/src/lib/linalg/square_norm.c similarity index 100% rename from linalg/square_norm.c rename to src/lib/linalg/square_norm.c diff --git a/linalg/square_norm.h b/src/lib/linalg/square_norm.h similarity index 100% rename from linalg/square_norm.h rename to src/lib/linalg/square_norm.h diff --git a/linalg/square_norm_32.c b/src/lib/linalg/square_norm_32.c similarity index 100% rename from linalg/square_norm_32.c rename to src/lib/linalg/square_norm_32.c diff --git a/linalg/square_norm_32.h b/src/lib/linalg/square_norm_32.h similarity index 100% rename from linalg/square_norm_32.h rename to src/lib/linalg/square_norm_32.h diff --git a/linalg_eo.h b/src/lib/linalg_eo.h similarity index 100% rename from linalg_eo.h rename to src/lib/linalg_eo.h diff --git a/little_D.c b/src/lib/little_D.c similarity index 100% rename from little_D.c rename to src/lib/little_D.c diff --git a/little_D.h b/src/lib/little_D.h similarity index 100% rename from little_D.h rename to src/lib/little_D.h diff --git a/little_D_body.c b/src/lib/little_D_body.c similarity index 100% rename from little_D_body.c rename to src/lib/little_D_body.c diff --git a/matrix_utils.c b/src/lib/matrix_utils.c similarity index 100% rename from matrix_utils.c rename to src/lib/matrix_utils.c diff --git a/matrix_utils.h b/src/lib/matrix_utils.h similarity index 100% rename from matrix_utils.h rename to src/lib/matrix_utils.h diff --git a/meas/Makefile.in b/src/lib/meas/Makefile.in similarity index 100% rename from meas/Makefile.in rename to src/lib/meas/Makefile.in diff --git a/meas/correlators.c b/src/lib/meas/correlators.c similarity index 100% rename from meas/correlators.c rename to src/lib/meas/correlators.c diff --git a/meas/correlators.h b/src/lib/meas/correlators.h similarity index 100% rename from meas/correlators.h rename to src/lib/meas/correlators.h diff --git a/meas/field_strength_types.h b/src/lib/meas/field_strength_types.h similarity index 100% rename from meas/field_strength_types.h rename to src/lib/meas/field_strength_types.h diff --git a/meas/gradient_flow.c b/src/lib/meas/gradient_flow.c similarity index 100% rename from meas/gradient_flow.c rename to src/lib/meas/gradient_flow.c diff --git a/meas/gradient_flow.h b/src/lib/meas/gradient_flow.h similarity index 100% rename from meas/gradient_flow.h rename to src/lib/meas/gradient_flow.h diff --git a/meas/measure_clover_field_strength_observables.c b/src/lib/meas/measure_clover_field_strength_observables.c similarity index 100% rename from meas/measure_clover_field_strength_observables.c rename to src/lib/meas/measure_clover_field_strength_observables.c diff --git a/meas/measure_clover_field_strength_observables.h b/src/lib/meas/measure_clover_field_strength_observables.h similarity index 100% rename from meas/measure_clover_field_strength_observables.h rename to src/lib/meas/measure_clover_field_strength_observables.h diff --git a/meas/measurements.c b/src/lib/meas/measurements.c similarity index 100% rename from meas/measurements.c rename to src/lib/meas/measurements.c diff --git a/meas/measurements.h b/src/lib/meas/measurements.h similarity index 100% rename from meas/measurements.h rename to src/lib/meas/measurements.h diff --git a/meas/oriented_plaquettes.c b/src/lib/meas/oriented_plaquettes.c similarity index 100% rename from meas/oriented_plaquettes.c rename to src/lib/meas/oriented_plaquettes.c diff --git a/meas/oriented_plaquettes.h b/src/lib/meas/oriented_plaquettes.h similarity index 100% rename from meas/oriented_plaquettes.h rename to src/lib/meas/oriented_plaquettes.h diff --git a/meas/pion_norm.c b/src/lib/meas/pion_norm.c similarity index 100% rename from meas/pion_norm.c rename to src/lib/meas/pion_norm.c diff --git a/meas/pion_norm.h b/src/lib/meas/pion_norm.h similarity index 100% rename from meas/pion_norm.h rename to src/lib/meas/pion_norm.h diff --git a/meas/polyakov_loop.c b/src/lib/meas/polyakov_loop.c similarity index 100% rename from meas/polyakov_loop.c rename to src/lib/meas/polyakov_loop.c diff --git a/meas/polyakov_loop.h b/src/lib/meas/polyakov_loop.h similarity index 100% rename from meas/polyakov_loop.h rename to src/lib/meas/polyakov_loop.h diff --git a/measure_gauge_action.c b/src/lib/measure_gauge_action.c similarity index 100% rename from measure_gauge_action.c rename to src/lib/measure_gauge_action.c diff --git a/measure_gauge_action.h b/src/lib/measure_gauge_action.h similarity index 100% rename from measure_gauge_action.h rename to src/lib/measure_gauge_action.h diff --git a/measure_rectangles.c b/src/lib/measure_rectangles.c similarity index 100% rename from measure_rectangles.c rename to src/lib/measure_rectangles.c diff --git a/measure_rectangles.h b/src/lib/measure_rectangles.h similarity index 100% rename from measure_rectangles.h rename to src/lib/measure_rectangles.h diff --git a/misc_types.h b/src/lib/misc_types.h similarity index 99% rename from misc_types.h rename to src/lib/misc_types.h index 91ceda0a8..fee62159f 100644 --- a/misc_types.h +++ b/src/lib/misc_types.h @@ -101,7 +101,7 @@ typedef enum tm_mpi_thread_level_t { TM_MPI_THREAD_SINGLE = QMP_THREAD_SINGLE, TM_MPI_THREAD_MULTIPLE = QMP_THREAD_MULTIPLE } tm_mpi_thread_level_t; -#elif TM_USE_MPI +#elif defined(TM_USE_MPI) typedef enum tm_mpi_thread_level_t { TM_MPI_THREAD_SINGLE = MPI_THREAD_SERIALIZED, TM_MPI_THREAD_MULTIPLE = MPI_THREAD_MULTIPLE diff --git a/monomial/Makefile.in b/src/lib/monomial/Makefile.in similarity index 100% rename from monomial/Makefile.in rename to src/lib/monomial/Makefile.in diff --git a/monomial/clover_trlog_monomial.c b/src/lib/monomial/clover_trlog_monomial.c similarity index 100% rename from monomial/clover_trlog_monomial.c rename to src/lib/monomial/clover_trlog_monomial.c diff --git a/monomial/clover_trlog_monomial.h b/src/lib/monomial/clover_trlog_monomial.h similarity index 100% rename from monomial/clover_trlog_monomial.h rename to src/lib/monomial/clover_trlog_monomial.h diff --git a/monomial/cloverdet_monomial.c b/src/lib/monomial/cloverdet_monomial.c similarity index 100% rename from monomial/cloverdet_monomial.c rename to src/lib/monomial/cloverdet_monomial.c diff --git a/monomial/cloverdet_monomial.h b/src/lib/monomial/cloverdet_monomial.h similarity index 100% rename from monomial/cloverdet_monomial.h rename to src/lib/monomial/cloverdet_monomial.h diff --git a/monomial/cloverdetratio_monomial.c b/src/lib/monomial/cloverdetratio_monomial.c similarity index 100% rename from monomial/cloverdetratio_monomial.c rename to src/lib/monomial/cloverdetratio_monomial.c diff --git a/monomial/cloverdetratio_monomial.h b/src/lib/monomial/cloverdetratio_monomial.h similarity index 100% rename from monomial/cloverdetratio_monomial.h rename to src/lib/monomial/cloverdetratio_monomial.h diff --git a/monomial/cloverdetratio_rwmonomial.c b/src/lib/monomial/cloverdetratio_rwmonomial.c similarity index 100% rename from monomial/cloverdetratio_rwmonomial.c rename to src/lib/monomial/cloverdetratio_rwmonomial.c diff --git a/monomial/cloverdetratio_rwmonomial.h b/src/lib/monomial/cloverdetratio_rwmonomial.h similarity index 100% rename from monomial/cloverdetratio_rwmonomial.h rename to src/lib/monomial/cloverdetratio_rwmonomial.h diff --git a/monomial/clovernd_trlog_monomial.c b/src/lib/monomial/clovernd_trlog_monomial.c similarity index 100% rename from monomial/clovernd_trlog_monomial.c rename to src/lib/monomial/clovernd_trlog_monomial.c diff --git a/monomial/clovernd_trlog_monomial.h b/src/lib/monomial/clovernd_trlog_monomial.h similarity index 100% rename from monomial/clovernd_trlog_monomial.h rename to src/lib/monomial/clovernd_trlog_monomial.h diff --git a/monomial/cloverndpoly_monomial.c b/src/lib/monomial/cloverndpoly_monomial.c similarity index 100% rename from monomial/cloverndpoly_monomial.c rename to src/lib/monomial/cloverndpoly_monomial.c diff --git a/monomial/cloverndpoly_monomial.h b/src/lib/monomial/cloverndpoly_monomial.h similarity index 100% rename from monomial/cloverndpoly_monomial.h rename to src/lib/monomial/cloverndpoly_monomial.h diff --git a/monomial/det_monomial.c b/src/lib/monomial/det_monomial.c similarity index 100% rename from monomial/det_monomial.c rename to src/lib/monomial/det_monomial.c diff --git a/monomial/det_monomial.h b/src/lib/monomial/det_monomial.h similarity index 100% rename from monomial/det_monomial.h rename to src/lib/monomial/det_monomial.h diff --git a/monomial/detratio_monomial.c b/src/lib/monomial/detratio_monomial.c similarity index 100% rename from monomial/detratio_monomial.c rename to src/lib/monomial/detratio_monomial.c diff --git a/monomial/detratio_monomial.h b/src/lib/monomial/detratio_monomial.h similarity index 100% rename from monomial/detratio_monomial.h rename to src/lib/monomial/detratio_monomial.h diff --git a/monomial/gauge_monomial.c b/src/lib/monomial/gauge_monomial.c similarity index 100% rename from monomial/gauge_monomial.c rename to src/lib/monomial/gauge_monomial.c diff --git a/monomial/gauge_monomial.h b/src/lib/monomial/gauge_monomial.h similarity index 100% rename from monomial/gauge_monomial.h rename to src/lib/monomial/gauge_monomial.h diff --git a/monomial/moment_energy.c b/src/lib/monomial/moment_energy.c similarity index 100% rename from monomial/moment_energy.c rename to src/lib/monomial/moment_energy.c diff --git a/monomial/moment_energy.h b/src/lib/monomial/moment_energy.h similarity index 100% rename from monomial/moment_energy.h rename to src/lib/monomial/moment_energy.h diff --git a/monomial/monitor_forces.c b/src/lib/monomial/monitor_forces.c similarity index 100% rename from monomial/monitor_forces.c rename to src/lib/monomial/monitor_forces.c diff --git a/monomial/monitor_forces.h b/src/lib/monomial/monitor_forces.h similarity index 100% rename from monomial/monitor_forces.h rename to src/lib/monomial/monitor_forces.h diff --git a/monomial/monomial.c b/src/lib/monomial/monomial.c similarity index 100% rename from monomial/monomial.c rename to src/lib/monomial/monomial.c diff --git a/monomial/monomial.h b/src/lib/monomial/monomial.h similarity index 100% rename from monomial/monomial.h rename to src/lib/monomial/monomial.h diff --git a/monomial/nddetratio_monomial.c b/src/lib/monomial/nddetratio_monomial.c similarity index 100% rename from monomial/nddetratio_monomial.c rename to src/lib/monomial/nddetratio_monomial.c diff --git a/monomial/nddetratio_monomial.h b/src/lib/monomial/nddetratio_monomial.h similarity index 100% rename from monomial/nddetratio_monomial.h rename to src/lib/monomial/nddetratio_monomial.h diff --git a/monomial/ndpoly_monomial.c b/src/lib/monomial/ndpoly_monomial.c similarity index 100% rename from monomial/ndpoly_monomial.c rename to src/lib/monomial/ndpoly_monomial.c diff --git a/monomial/ndpoly_monomial.h b/src/lib/monomial/ndpoly_monomial.h similarity index 100% rename from monomial/ndpoly_monomial.h rename to src/lib/monomial/ndpoly_monomial.h diff --git a/monomial/ndrat_monomial.c b/src/lib/monomial/ndrat_monomial.c similarity index 100% rename from monomial/ndrat_monomial.c rename to src/lib/monomial/ndrat_monomial.c diff --git a/monomial/ndrat_monomial.h b/src/lib/monomial/ndrat_monomial.h similarity index 100% rename from monomial/ndrat_monomial.h rename to src/lib/monomial/ndrat_monomial.h diff --git a/monomial/ndratcor_monomial.c b/src/lib/monomial/ndratcor_monomial.c similarity index 100% rename from monomial/ndratcor_monomial.c rename to src/lib/monomial/ndratcor_monomial.c diff --git a/monomial/ndratcor_monomial.h b/src/lib/monomial/ndratcor_monomial.h similarity index 100% rename from monomial/ndratcor_monomial.h rename to src/lib/monomial/ndratcor_monomial.h diff --git a/monomial/poly_monomial.c b/src/lib/monomial/poly_monomial.c similarity index 100% rename from monomial/poly_monomial.c rename to src/lib/monomial/poly_monomial.c diff --git a/monomial/poly_monomial.h b/src/lib/monomial/poly_monomial.h similarity index 100% rename from monomial/poly_monomial.h rename to src/lib/monomial/poly_monomial.h diff --git a/monomial/rat_monomial.c b/src/lib/monomial/rat_monomial.c similarity index 100% rename from monomial/rat_monomial.c rename to src/lib/monomial/rat_monomial.c diff --git a/monomial/rat_monomial.h b/src/lib/monomial/rat_monomial.h similarity index 100% rename from monomial/rat_monomial.h rename to src/lib/monomial/rat_monomial.h diff --git a/monomial/ratcor_monomial.c b/src/lib/monomial/ratcor_monomial.c similarity index 100% rename from monomial/ratcor_monomial.c rename to src/lib/monomial/ratcor_monomial.c diff --git a/monomial/ratcor_monomial.h b/src/lib/monomial/ratcor_monomial.h similarity index 100% rename from monomial/ratcor_monomial.h rename to src/lib/monomial/ratcor_monomial.h diff --git a/mpi_init.c b/src/lib/mpi_init.c similarity index 100% rename from mpi_init.c rename to src/lib/mpi_init.c diff --git a/mpi_init.h b/src/lib/mpi_init.h similarity index 100% rename from mpi_init.h rename to src/lib/mpi_init.h diff --git a/omp_accumulator.h b/src/lib/omp_accumulator.h similarity index 100% rename from omp_accumulator.h rename to src/lib/omp_accumulator.h diff --git a/operator.c b/src/lib/operator.c similarity index 100% rename from operator.c rename to src/lib/operator.c diff --git a/operator.h b/src/lib/operator.h similarity index 100% rename from operator.h rename to src/lib/operator.h diff --git a/operator/Block_D_psi_body.c b/src/lib/operator/Block_D_psi_body.c similarity index 100% rename from operator/Block_D_psi_body.c rename to src/lib/operator/Block_D_psi_body.c diff --git a/operator/D_psi.c b/src/lib/operator/D_psi.c similarity index 100% rename from operator/D_psi.c rename to src/lib/operator/D_psi.c diff --git a/operator/D_psi.h b/src/lib/operator/D_psi.h similarity index 100% rename from operator/D_psi.h rename to src/lib/operator/D_psi.h diff --git a/operator/D_psi_body.c b/src/lib/operator/D_psi_body.c similarity index 100% rename from operator/D_psi_body.c rename to src/lib/operator/D_psi_body.c diff --git a/operator/Dov_proj.c b/src/lib/operator/Dov_proj.c similarity index 100% rename from operator/Dov_proj.c rename to src/lib/operator/Dov_proj.c diff --git a/operator/Dov_proj.h b/src/lib/operator/Dov_proj.h similarity index 100% rename from operator/Dov_proj.h rename to src/lib/operator/Dov_proj.h diff --git a/operator/Dov_psi.c b/src/lib/operator/Dov_psi.c similarity index 100% rename from operator/Dov_psi.c rename to src/lib/operator/Dov_psi.c diff --git a/operator/Dov_psi.h b/src/lib/operator/Dov_psi.h similarity index 100% rename from operator/Dov_psi.h rename to src/lib/operator/Dov_psi.h diff --git a/operator/Hopping_Matrix.c b/src/lib/operator/Hopping_Matrix.c similarity index 100% rename from operator/Hopping_Matrix.c rename to src/lib/operator/Hopping_Matrix.c diff --git a/operator/Hopping_Matrix.h b/src/lib/operator/Hopping_Matrix.h similarity index 100% rename from operator/Hopping_Matrix.h rename to src/lib/operator/Hopping_Matrix.h diff --git a/operator/Hopping_Matrix_32.c b/src/lib/operator/Hopping_Matrix_32.c similarity index 100% rename from operator/Hopping_Matrix_32.c rename to src/lib/operator/Hopping_Matrix_32.c diff --git a/operator/Hopping_Matrix_32.h b/src/lib/operator/Hopping_Matrix_32.h similarity index 100% rename from operator/Hopping_Matrix_32.h rename to src/lib/operator/Hopping_Matrix_32.h diff --git a/operator/Hopping_Matrix_32_nocom.c b/src/lib/operator/Hopping_Matrix_32_nocom.c similarity index 100% rename from operator/Hopping_Matrix_32_nocom.c rename to src/lib/operator/Hopping_Matrix_32_nocom.c diff --git a/operator/Hopping_Matrix_nocom.c b/src/lib/operator/Hopping_Matrix_nocom.c similarity index 100% rename from operator/Hopping_Matrix_nocom.c rename to src/lib/operator/Hopping_Matrix_nocom.c diff --git a/operator/Hopping_Matrix_nocom.h b/src/lib/operator/Hopping_Matrix_nocom.h similarity index 100% rename from operator/Hopping_Matrix_nocom.h rename to src/lib/operator/Hopping_Matrix_nocom.h diff --git a/operator/Makefile.in b/src/lib/operator/Makefile.in similarity index 100% rename from operator/Makefile.in rename to src/lib/operator/Makefile.in diff --git a/operator/assign_mul_one_sw_pm_imu_inv_block_body.c b/src/lib/operator/assign_mul_one_sw_pm_imu_inv_block_body.c similarity index 100% rename from operator/assign_mul_one_sw_pm_imu_inv_block_body.c rename to src/lib/operator/assign_mul_one_sw_pm_imu_inv_block_body.c diff --git a/operator/assign_mul_one_sw_pm_imu_site_lexic_body.c b/src/lib/operator/assign_mul_one_sw_pm_imu_site_lexic_body.c similarity index 100% rename from operator/assign_mul_one_sw_pm_imu_site_lexic_body.c rename to src/lib/operator/assign_mul_one_sw_pm_imu_site_lexic_body.c diff --git a/operator/clover_accumulate_deriv.c b/src/lib/operator/clover_accumulate_deriv.c similarity index 100% rename from operator/clover_accumulate_deriv.c rename to src/lib/operator/clover_accumulate_deriv.c diff --git a/operator/clover_deriv.c b/src/lib/operator/clover_deriv.c similarity index 100% rename from operator/clover_deriv.c rename to src/lib/operator/clover_deriv.c diff --git a/operator/clover_det.c b/src/lib/operator/clover_det.c similarity index 100% rename from operator/clover_det.c rename to src/lib/operator/clover_det.c diff --git a/operator/clover_inline.h b/src/lib/operator/clover_inline.h similarity index 100% rename from operator/clover_inline.h rename to src/lib/operator/clover_inline.h diff --git a/operator/clover_invert.c b/src/lib/operator/clover_invert.c similarity index 100% rename from operator/clover_invert.c rename to src/lib/operator/clover_invert.c diff --git a/operator/clover_leaf.c b/src/lib/operator/clover_leaf.c similarity index 100% rename from operator/clover_leaf.c rename to src/lib/operator/clover_leaf.c diff --git a/operator/clover_leaf.h b/src/lib/operator/clover_leaf.h similarity index 100% rename from operator/clover_leaf.h rename to src/lib/operator/clover_leaf.h diff --git a/operator/clover_term.c b/src/lib/operator/clover_term.c similarity index 100% rename from operator/clover_term.c rename to src/lib/operator/clover_term.c diff --git a/operator/clovertm_operators.c b/src/lib/operator/clovertm_operators.c similarity index 100% rename from operator/clovertm_operators.c rename to src/lib/operator/clovertm_operators.c diff --git a/operator/clovertm_operators.h b/src/lib/operator/clovertm_operators.h similarity index 100% rename from operator/clovertm_operators.h rename to src/lib/operator/clovertm_operators.h diff --git a/operator/clovertm_operators_32.c b/src/lib/operator/clovertm_operators_32.c similarity index 100% rename from operator/clovertm_operators_32.c rename to src/lib/operator/clovertm_operators_32.c diff --git a/operator/clovertm_operators_32.h b/src/lib/operator/clovertm_operators_32.h similarity index 100% rename from operator/clovertm_operators_32.h rename to src/lib/operator/clovertm_operators_32.h diff --git a/operator/halfspinor_body.c b/src/lib/operator/halfspinor_body.c similarity index 100% rename from operator/halfspinor_body.c rename to src/lib/operator/halfspinor_body.c diff --git a/operator/halfspinor_body_32.c b/src/lib/operator/halfspinor_body_32.c similarity index 100% rename from operator/halfspinor_body_32.c rename to src/lib/operator/halfspinor_body_32.c diff --git a/operator/halfspinor_hopping.h b/src/lib/operator/halfspinor_hopping.h similarity index 100% rename from operator/halfspinor_hopping.h rename to src/lib/operator/halfspinor_hopping.h diff --git a/operator/halfspinor_hopping_32.h b/src/lib/operator/halfspinor_hopping_32.h similarity index 100% rename from operator/halfspinor_hopping_32.h rename to src/lib/operator/halfspinor_hopping_32.h diff --git a/operator/hopping_bg_dbl.c b/src/lib/operator/hopping_bg_dbl.c similarity index 100% rename from operator/hopping_bg_dbl.c rename to src/lib/operator/hopping_bg_dbl.c diff --git a/operator/hopping_body_dbl.c b/src/lib/operator/hopping_body_dbl.c similarity index 100% rename from operator/hopping_body_dbl.c rename to src/lib/operator/hopping_body_dbl.c diff --git a/operator/hopping_sgl.c b/src/lib/operator/hopping_sgl.c similarity index 100% rename from operator/hopping_sgl.c rename to src/lib/operator/hopping_sgl.c diff --git a/operator/mul_one_pm_imu_inv_body.c b/src/lib/operator/mul_one_pm_imu_inv_body.c similarity index 100% rename from operator/mul_one_pm_imu_inv_body.c rename to src/lib/operator/mul_one_pm_imu_inv_body.c diff --git a/operator/mul_one_pm_imu_sub_mul_body.c b/src/lib/operator/mul_one_pm_imu_sub_mul_body.c similarity index 100% rename from operator/mul_one_pm_imu_sub_mul_body.c rename to src/lib/operator/mul_one_pm_imu_sub_mul_body.c diff --git a/operator/tm_operators.c b/src/lib/operator/tm_operators.c similarity index 100% rename from operator/tm_operators.c rename to src/lib/operator/tm_operators.c diff --git a/operator/tm_operators.h b/src/lib/operator/tm_operators.h similarity index 100% rename from operator/tm_operators.h rename to src/lib/operator/tm_operators.h diff --git a/operator/tm_operators_32.c b/src/lib/operator/tm_operators_32.c similarity index 100% rename from operator/tm_operators_32.c rename to src/lib/operator/tm_operators_32.c diff --git a/operator/tm_operators_32.h b/src/lib/operator/tm_operators_32.h similarity index 100% rename from operator/tm_operators_32.h rename to src/lib/operator/tm_operators_32.h diff --git a/operator/tm_operators_nd.c b/src/lib/operator/tm_operators_nd.c similarity index 100% rename from operator/tm_operators_nd.c rename to src/lib/operator/tm_operators_nd.c diff --git a/operator/tm_operators_nd.h b/src/lib/operator/tm_operators_nd.h similarity index 100% rename from operator/tm_operators_nd.h rename to src/lib/operator/tm_operators_nd.h diff --git a/operator/tm_operators_nd_32.c b/src/lib/operator/tm_operators_nd_32.c similarity index 100% rename from operator/tm_operators_nd_32.c rename to src/lib/operator/tm_operators_nd_32.c diff --git a/operator/tm_operators_nd_32.h b/src/lib/operator/tm_operators_nd_32.h similarity index 100% rename from operator/tm_operators_nd_32.h rename to src/lib/operator/tm_operators_nd_32.h diff --git a/operator/tm_sub_Hopping_Matrix.c b/src/lib/operator/tm_sub_Hopping_Matrix.c similarity index 100% rename from operator/tm_sub_Hopping_Matrix.c rename to src/lib/operator/tm_sub_Hopping_Matrix.c diff --git a/operator/tm_sub_Hopping_Matrix.h b/src/lib/operator/tm_sub_Hopping_Matrix.h similarity index 100% rename from operator/tm_sub_Hopping_Matrix.h rename to src/lib/operator/tm_sub_Hopping_Matrix.h diff --git a/operator/tm_times_Hopping_Matrix.c b/src/lib/operator/tm_times_Hopping_Matrix.c similarity index 100% rename from operator/tm_times_Hopping_Matrix.c rename to src/lib/operator/tm_times_Hopping_Matrix.c diff --git a/operator/tm_times_Hopping_Matrix.h b/src/lib/operator/tm_times_Hopping_Matrix.h similarity index 100% rename from operator/tm_times_Hopping_Matrix.h rename to src/lib/operator/tm_times_Hopping_Matrix.h diff --git a/operator_types.h b/src/lib/operator_types.h similarity index 100% rename from operator_types.h rename to src/lib/operator_types.h diff --git a/overrelaxation.c b/src/lib/overrelaxation.c similarity index 99% rename from overrelaxation.c rename to src/lib/overrelaxation.c index 2c2e486f7..91d95fa30 100644 --- a/overrelaxation.c +++ b/src/lib/overrelaxation.c @@ -205,7 +205,7 @@ void overrel_sweep() { static su3 v; for (mu = 0; mu < 4; mu++) { for (ix = 0; ix < VOLUME; ix++) { - get_staples(&v, ix, mu, g_gauge_field); + get_staples(&v, ix, mu, (const su3 **)g_gauge_field); flip_subgroup(ix, mu, v, 1); flip_subgroup(ix, mu, v, 2); flip_subgroup(ix, mu, v, 3); diff --git a/overrelaxation.h b/src/lib/overrelaxation.h similarity index 100% rename from overrelaxation.h rename to src/lib/overrelaxation.h diff --git a/parallel_io.h b/src/lib/parallel_io.h similarity index 100% rename from parallel_io.h rename to src/lib/parallel_io.h diff --git a/phmc.c b/src/lib/phmc.c similarity index 100% rename from phmc.c rename to src/lib/phmc.c diff --git a/phmc.h b/src/lib/phmc.h similarity index 100% rename from phmc.h rename to src/lib/phmc.h diff --git a/prepare_source.c b/src/lib/prepare_source.c similarity index 100% rename from prepare_source.c rename to src/lib/prepare_source.c diff --git a/prepare_source.h b/src/lib/prepare_source.h similarity index 100% rename from prepare_source.h rename to src/lib/prepare_source.h diff --git a/profiling/hmc/Readme.md b/src/lib/profiling/hmc/Readme.md similarity index 100% rename from profiling/hmc/Readme.md rename to src/lib/profiling/hmc/Readme.md diff --git a/profiling/hmc/example_profile.pdf b/src/lib/profiling/hmc/example_profile.pdf similarity index 100% rename from profiling/hmc/example_profile.pdf rename to src/lib/profiling/hmc/example_profile.pdf diff --git a/profiling/hmc/profile.Rmd b/src/lib/profiling/hmc/profile.Rmd similarity index 100% rename from profiling/hmc/profile.Rmd rename to src/lib/profiling/hmc/profile.Rmd diff --git a/profiling/hmc/timing.R b/src/lib/profiling/hmc/timing.R similarity index 100% rename from profiling/hmc/timing.R rename to src/lib/profiling/hmc/timing.R diff --git a/profiling/hmc_mk2/.gitignore b/src/lib/profiling/hmc_mk2/.gitignore similarity index 100% rename from profiling/hmc_mk2/.gitignore rename to src/lib/profiling/hmc_mk2/.gitignore diff --git a/profiling/hmc_mk2/README.md b/src/lib/profiling/hmc_mk2/README.md similarity index 100% rename from profiling/hmc_mk2/README.md rename to src/lib/profiling/hmc_mk2/README.md diff --git a/profiling/hmc_mk2/logs/example_log.out b/src/lib/profiling/hmc_mk2/logs/example_log.out similarity index 100% rename from profiling/hmc_mk2/logs/example_log.out rename to src/lib/profiling/hmc_mk2/logs/example_log.out diff --git a/profiling/hmc_mk2/make_profile.R b/src/lib/profiling/hmc_mk2/make_profile.R similarity index 100% rename from profiling/hmc_mk2/make_profile.R rename to src/lib/profiling/hmc_mk2/make_profile.R diff --git a/profiling/hmc_mk2/profile.Rmd b/src/lib/profiling/hmc_mk2/profile.Rmd similarity index 100% rename from profiling/hmc_mk2/profile.Rmd rename to src/lib/profiling/hmc_mk2/profile.Rmd diff --git a/src/lib/qphix/qphix_base_classes.hpp b/src/lib/qphix/qphix_base_classes.hpp new file mode 100644 index 000000000..26015e3a2 --- /dev/null +++ b/src/lib/qphix/qphix_base_classes.hpp @@ -0,0 +1,771 @@ +// Copyright © 2017 Martin Ueding +// Licensed unter the [BSD-3-Clause](https://opensource.org/licenses/BSD-3-Clause). + +// Due to github issue #404, the helper functions to apply the full QPhiX operator +// are currently disabled because they conflict with the new interfaces in QPhiX +// itself. If required, these should be rewritten to use these interfaces +// rather than the base classes in qphix_base_classes.hpp + +// This file should be deprecated or updated to provide any functionality +// not covered by QPhiX itself. + +/** + \file Additions to QPhiX that are only needed for tmLQCD. + + In the original QPhiX, there are only Wilson fermions and Wilson clover + fermions. The Dslash operators have a different call signature (the latter + requiring a clover term), so there is no common base class. With the addition + of Wilson twisted mass (Mario) and Wilson twisted clover (Peter), there are + now two instances of the Dslash that have the same signature. In order to + write a more general even-odd source preparation and solution reconstruction + code, a common base class for non-clover and clover is desired. In order to + leave the QPhiX code untouched (for now), this code lives here in tmLQCD. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace tmlqcd { + +namespace { +size_t constexpr re = 0; +size_t constexpr im = 1; +int const n_blas_simt = 1; + +// The even checkerboard is given by ( (x + y + z + t ) & 1 == 0 ) -> cb0 is even +int constexpr cb_even = 0; +int constexpr cb_odd = 1; +} + +/** + Complex multiplication accumulate. + + Computes \f$ (r + \mathrm i i) += (a + \mathrm i b) * (c + \mathrm i d) \f$. + */ +template +void cplx_mul_acc(FT &r_out, FT &i_out, FT const &a, FT const &b, FT const &c, FT const &d) { + r_out += a * c - b * d; + i_out += a * d + b * c; +} + +/** + Wrapper for the clover multiplication function. + + The `struct` is needed in order to allow for partial template specialization in the `Clover` + parameter. + + \tparam Clover Type of clover block to use, must be a type from Geometry such that there exists a + specialization for it. + */ +template +struct InnerCloverProduct { + /** + Multiplies the clover term for a single lattice size to a spinor. + + This function is intended to be used in a loop over all lattice sites. It is expected from the + caller to have figured out all the correct indices. There are template specializations for the two + different types of clover term that are used in QPhiX. + + \param[out] out Output spinor block. It is assumed to be zeroed properly, the function will just + accumulate values into that output variable. Use \ref QPhiX::zeroSpinor for that. + \param[in] in Input spinor block. + \param[in] clover Single clover block that contains the lattice site of the spinor. + \param[in] xi SIMD index for the arrays with length `soalen`, as in the spinors. + \param[in] veclen_idx SIMD index for the arrays with length `veclen`, as in the clover term. + */ + static void multiply( + typename ::QPhiX::Geometry::FourSpinorBlock &out, + typename ::QPhiX::Geometry::FourSpinorBlock const &in, + Clover const &clover, int const xi, int const veclen_idx); +}; + +template +struct InnerCloverProduct::CloverBlock> { + static void multiply( + typename ::QPhiX::Geometry::FourSpinorBlock &spinor_out, + typename ::QPhiX::Geometry::FourSpinorBlock const &spinor_in, + typename ::QPhiX::Geometry::CloverBlock const &clov_block, + int const xi, int const veclen_idx) { + // The clover term is block-diagonal in spin. Therefore we need + // to iterate over the two blocks of spin. + for (auto s_block : {0, 1}) { + // Extract the diagonal and triangular parts. + auto const &diag_in = s_block == 0 ? clov_block.diag1 : clov_block.diag2; + auto const &off_diag_in = s_block == 0 ? clov_block.off_diag1 : clov_block.off_diag2; + // Input two-spinor component. + for (auto two_s_in : {0, 1}) { + // Reconstruct four spinor index. + auto const four_s_in = 2 * s_block + two_s_in; + // Output two-spinor component. + for (auto two_s_out : {0, 1}) { + // Reconstruct four spinor index. + auto const four_s_out = 2 * s_block + two_s_out; + // Input color. + for (auto c_in : {0, 1, 2}) { + // Spin-color index (0, ..., 5). + auto const sc_in = 3 * two_s_in + c_in; + // Output color. + for (auto c_out : {0, 1, 2}) { + // Spin-color index (0, ..., 5). + auto const sc_out = 3 * two_s_out + c_out; + + // See `qphix-codegen` file `dslash_common.cc` + // function + // `clover_term` for the index manipulations done + // here. + + // Using separate loops over the actual indices is + // probably + // faster than the branching in the innermost loop. + + if (sc_out == sc_in) { + cplx_mul_acc(spinor_out[c_out][four_s_out][re][xi], + spinor_out[c_out][four_s_out][im][xi], diag_in[sc_in][veclen_idx], + QPhiX::rep(0.0), spinor_in[c_in][four_s_in][re][xi], + spinor_in[c_in][four_s_in][im][xi]); + } else if (sc_out < sc_in) { + auto const idx15 = sc_in * (sc_in - 1) / 2 + sc_out; + cplx_mul_acc( + spinor_out[c_out][four_s_out][re][xi], spinor_out[c_out][four_s_out][im][xi], + off_diag_in[idx15][re][veclen_idx], + // aww hell, maybe one should just add negation to QPhiX::half ? + QPhiX::rep(-QPhiX::rep(off_diag_in[idx15][im][veclen_idx])), + spinor_in[c_in][four_s_in][re][xi], spinor_in[c_in][four_s_in][im][xi]); + } else { + auto const idx15 = sc_out * (sc_out - 1) / 2 + sc_in; + cplx_mul_acc( + spinor_out[c_out][four_s_out][re][xi], spinor_out[c_out][four_s_out][im][xi], + off_diag_in[idx15][re][veclen_idx], off_diag_in[idx15][im][veclen_idx], + spinor_in[c_in][four_s_in][re][xi], spinor_in[c_in][four_s_in][im][xi]); + } + } + } + } + } + } + } +}; + +template +struct InnerCloverProduct< + FT, veclen, soalen, compress12, + typename ::QPhiX::Geometry::FullCloverBlock> { + static void multiply( + typename ::QPhiX::Geometry::FourSpinorBlock &spinor_out, + typename ::QPhiX::Geometry::FourSpinorBlock const &spinor_in, + typename ::QPhiX::Geometry::FullCloverBlock const &clov_block, + int const xi, int const veclen_idx) { + // The clover term is block-diagonal in spin. Therefore we need + // to iterate over the two blocks of spin. + for (auto s_block : {0, 1}) { + // handy reference to half-spinor block + auto const &block_in = s_block == 0 ? clov_block.block1 : clov_block.block2; + // Input two-spinor component. + for (auto two_s_in : {0, 1}) { + // Reconstruct four spinor index. + auto const four_s_in = 2 * s_block + two_s_in; + // Output two-spinor component. + for (auto two_s_out : {0, 1}) { + // Reconstruct four spinor index. + auto const four_s_out = 2 * s_block + two_s_out; + // Input color. + for (auto c_in : {0, 1, 2}) { + // Spin-color index (0, ..., 5). + auto const sc_in = 3 * two_s_in + c_in; + // Output color. + for (auto c_out : {0, 1, 2}) { + // Spin-color index (0, ..., 5). + auto const sc_out = 3 * two_s_out + c_out; + + cplx_mul_acc( + spinor_out[c_out][four_s_out][re][xi], spinor_out[c_out][four_s_out][im][xi], + block_in[sc_out][sc_in][re][veclen_idx], block_in[sc_out][sc_in][im][veclen_idx], + spinor_in[c_in][four_s_in][re][xi], spinor_in[c_in][four_s_in][im][xi]); + } + } + } + } + } + } +}; + +/** + Multiplies a checkerboarded QPhiX Clover term with a checkerboarded QPhiX spinor. + + Padding is taken care of. A test case for (a copy of) this function exists in QPhiX. + + If the preprocessor macro `PRINT_MAPPING` is defined, it will print out the mapping of `(x, y, z, + t)` coordinates to block indices. Also it will check that each block is accessed the proper number + of times, that is `soalen` for spinors and `veclen` for clover blocks. + + \param[out] out Output spinor + \param[in] in Input spinor + \param[in] clover Clover block + \param[in] geom Geometry object holding the dimension of clover and spinor + */ +template +void clover_product( + typename ::QPhiX::Geometry::FourSpinorBlock *const out, + typename ::QPhiX::Geometry::FourSpinorBlock const *const in, + Clover *clover, ::QPhiX::Geometry &geom) { + ::QPhiX::zeroSpinor(out, geom, n_blas_simt); + +#ifdef PRINT_MAPPING + std::vector spin_touches(geom.getPxyz() * geom.Nt(), 0); + std::vector clover_touches(geom.getPxyz() * geom.Nt() * soalen / veclen, 0); + + std::cout << std::setw(3) << "x" << std::setw(3) << "y" << std::setw(3) << "z" << std::setw(3) + << "t" + << ":" << std::setw(5) << "spin" << std::setw(5) << "clov" + << "\n"; +#endif + + // Iterate through all the block. + for (int t = 0; t < geom.Nt(); ++t) { + for (int z = 0; z < geom.Nz(); ++z) { + for (int y = 0; y < geom.Ny(); ++y) { + for (int x = 0; x < geom.Nxh(); ++x) { + // First element in the current XY plane at desired Z and T. + auto const xyBase = t * geom.getPxyz() + z * geom.getPxy(); + // Index of the SoA along the X direction. + auto const xb = x / soalen; + // Index within the SoA. + auto const xi = x % soalen; + // Global spin block index. + auto const spin_block_idx = xb + geom.Nxh() / soalen * y + xyBase; + // Global clover/gauge block index. + auto const clov_block_idx = + xb + (y / geom.nGY()) * geom.Nxh() / soalen + xyBase / geom.nGY(); + // Index of the SoA structure within the current tile. + // auto const tile = (geom.Nxh() / soalen * y + xyBase) % geom.nGY(); + auto const tile = y % geom.nGY(); + // Vector index for clover/gauge. The SoA index only runs to + // `soalen`, this index needs to run to `veclen`, that is across the + // various SoA within the tile. + auto const veclen_idx = soalen * tile + xi; + +#ifdef PRINT_MAPPING + ++spin_touches[spin_block_idx]; + ++clover_touches[clov_block_idx]; + + std::cout << std::setw(3) << x << std::setw(3) << y << std::setw(3) << z << std::setw(3) + << t << ":" << std::setw(5) << spin_block_idx << std::setw(5) << clov_block_idx + << "\n"; +#endif + + assert(xi + xb * soalen == x); + + // References to the objects at desired block. + auto const &clov_block = clover[clov_block_idx]; + auto const &spinor_in = in[spin_block_idx]; + auto &spinor_out = out[spin_block_idx]; + + InnerCloverProduct::multiply( + spinor_out, spinor_in, clov_block, xi, veclen_idx); + } + } + } + } + +#ifdef PRINT_MAPPING + std::cout << std::flush; + + // Make sure that each block got touched the correct number of times. + for (int i = 0; i != spin_touches.size(); ++i) { + if (spin_touches[i] != soalen) { + std::cout << "Spin missmatch: Block " << std::setw(4) << i << " accessed " << std::setw(4) + << spin_touches[i] << " times instead of " << soalen << "\n"; + } + } + + for (int i = 0; i != clover_touches.size(); ++i) { + if (clover_touches[i] != veclen) { + std::cout << "Clover missmatch: Block " << std::setw(4) << i << " accessed " << std::setw(4) + << clover_touches[i] << " times instead of " << veclen << "\n"; + } + } + + std::cout << std::flush; +#endif +} + +/** + Abstract base class for all single-flavor Dslash variants. + + There are four Dslash operators which are implemented in QPhiX: + + - Wilson + - Wilson clover + - Wilson twisted mass + - Wilson clover with twisted mass + + Each of these has a the actual Dslash operation and a so-called “achimbdpsi” operation. These act + on four-spinors given a gauge field. This base class provides a uniform interface to all four + kinds. + + This code should eventually be migrated into the QPhiX repository. Currently these classes are + mere delegators. In the QPhiX repository, the actual classes there should be used as concrete + classes. + */ +template +class Dslash { + public: + typedef ::QPhiX::Geometry Geom; + typedef typename Geom::FourSpinorBlock Spinor; + typedef typename Geom::SU3MatrixBlock SU3MatrixBlock; + + explicit Dslash(Geom *geom, double const t_boundary_, double const aniso_coeff_S_, + double const aniso_coeff_T_, double const mass_, bool use_tbc_[4] = nullptr, + double tbc_phases_[4][2] = nullptr) + : geom(geom), + t_boundary(t_boundary_), + aniso_coeff_S(aniso_coeff_S_), + aniso_coeff_T(aniso_coeff_T_), + mass(mass_) {} + + /** + Computes \f$ \psi_\mathrm o = A_\mathrm{oo} \chi_\mathrm o \f$. + + The actual definition of the matrix \f$ A_\mathrm{oo} \f$ is + implementation dependent and can be the mass factor \f$ \alpha = 4 + m + \f$ for plain Wilson or something more complicated for twisted mass. + + \param[out] out Output spinor \f$ \psi \f$. + \param[in] in Input spinor \f$ \chi \f$. + */ + virtual void A_chi(Spinor *const out, Spinor const *const in, int const isign, int const cb) = 0; + + /** + Computes \f$ \psi_\mathrm e = A_\mathrm{ee}^{-1} \chi_\mathrm e \f$. + + \param[out] out Output spinor \f$ \psi \f$. + \param[in] in Input spinor \f$ \chi \f$. + */ + virtual void A_inv_chi(Spinor *const out, Spinor const *const in, int const isign, + int const cb) = 0; + + /** + Forwarder for the `dslash`. + + This will call the `dslash` function of the respective QPhiX dslash class. There is a subtle + difference between the Wilson and all other cases. The Wilson dslash is just the hopping matrix, + just the operator \f$ D \f$. For every other case (clover, twisted mass, twisted mass clover), + the `dslash` member function will compute \f$ A^{-1} D \f$. In the Wilson case, this \f$ A = + \alpha = 4 + m = 1/(2 \kappa) \f$. Since that is _not_ included in the Wilson `dslash`, you will + obtain different results when using WilsonDslash::dslash and WilsonTMDslash::dslash with \f$ + \mu = 0 \f$. + + \todo Make this member function `const`. For this the member function in + QPhiX that is called internally must be marked `const` as well. + */ + virtual void dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, + int const isign, int const cb) = 0; + + /** + Always plain Wilson dslash. + + In contrast to the \ref dslash member function which just forwards the implementation of QPhiX, + this will always give you the “naked” plain Wilson dslash without any factors of \f$ A^{-1} \f$ + applied. + */ + virtual void plain_dslash(Spinor *const res, const Spinor *const psi, + const SU3MatrixBlock *const u, int const isign, int const cb) { + // XXX Perhaps rather implement this with an instance of the WilsonDslash instead? + + auto tmp = QPhiX::makeFourSpinorHandle(*geom); + dslash(tmp.get(), psi, u, isign, cb); + A_chi(res, tmp.get(), isign, cb); + }; + + /** + Always “dressed” dslash. + + This computes \f$ A^{-1} D \f$ for all variants. In the Wilson case, this will give \f$ + \alpha^{-1} D \f$. + */ + virtual void A_inv_dslash(Spinor *const res, const Spinor *const psi, + const SU3MatrixBlock *const u, int const isign, int const cb) { + dslash(res, psi, u, isign, cb); + }; + + /** + Forwarder for the `achimbdpsi`. + + \todo Make this member function `const`. For this the member function in QPhiX that is called + internally must be marked `const` as well. + */ + virtual void achimbdpsi(Spinor *const res, const Spinor *const psi, const Spinor *const chi, + const SU3MatrixBlock *const u, double const alpha, double const beta, + int const isign, int const cb) = 0; + + /** + Prepares the sources on the odd checkerboard. + + This computes + \f[ + \tilde b_o = \frac 12 D_{oe} M_{ee}^{-1} b_e + b_o \,. + \f] + + \param[out] tilde_b_odd Prepared source + \param[in] b_even Source (right hand side) on the even lattice sites + \param]in] b_odd Source on the odd lattice sites + \param[in] u Gauge field on the odd lattice sites + */ + virtual void prepare_source(Spinor *const tilde_b_odd, Spinor const *const b_even, + Spinor const *const b_odd, SU3MatrixBlock const *const u); + + /** + Reconstructs the solution on the even lattices sites. + + This computes + \f[ + x_e = M_{ee}^{-1} \left( b_e - \frac 12 D_{eo} x_o \right) \,. + \f] + + \param[out] x_even Solution on the even lattices sites + \param[in] b_even Source (right hand side) on the even lattice sites + \param[in] x_odd Solution on the odd lattices sites + \param[in] u Gauge field on the even lattice sites + */ + virtual void reconstruct_solution(Spinor *const x_even, Spinor const *const b_even, + Spinor const *const x_odd, SU3MatrixBlock const *const u); + + Geom *getGeometry() const { return geom; } + + private: + Geom *const geom; + + double const t_boundary; + double const aniso_coeff_S; + double const aniso_coeff_T; + double const mass; +}; + +template +class WilsonDslash : public Dslash { + public: + typedef typename ::QPhiX::Geometry::FourSpinorBlock Spinor; + typedef typename ::QPhiX::Geometry::SU3MatrixBlock SU3MatrixBlock; + + WilsonDslash(::QPhiX::Geometry *geom_, double const t_boundary_, + double const aniso_coeff_S_, double const aniso_coeff_T_, double const mass_, + bool use_tbc_[4] = nullptr, double tbc_phases_[4][2] = nullptr) + : Dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, + mass_, use_tbc_, tbc_phases_), + upstream_dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, use_tbc_, tbc_phases_), + mass_factor_alpha(4.0 + mass_), + mass_factor_beta(1.0 / (4.0 * mass_factor_alpha)) {} + + void A_chi(Spinor *const out, Spinor const *const in, int const isign_ignored, + int const cb_ignored) override { + int const n_blas_simt = 1; + ::QPhiX::axy(mass_factor_alpha, in, out, upstream_dslash.getGeometry(), n_blas_simt); + } + + void A_inv_chi(Spinor *const out, Spinor const *const in, int const isign_ignored, + int const cb_ignored) override { + int const n_blas_simt = 1; + ::QPhiX::axy(1.0 / mass_factor_alpha, in, out, upstream_dslash.getGeometry(), n_blas_simt); + } + + void dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, + int const isign, int const cb) override { + upstream_dslash.dslash(res, psi, u, isign, cb); + } + + void plain_dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, + int const isign, int const cb) override { + dslash(res, psi, u, isign, cb); + }; + + void A_inv_dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, + int const isign, int const cb) override { + auto tmp = QPhiX::makeFourSpinorHandle(upstream_dslash.getGeometry()); + dslash(tmp.get(), psi, u, isign, cb); + A_inv_chi(res, tmp.get(), isign, cb); + }; + + void achimbdpsi(Spinor *const res, const Spinor *const psi, const Spinor *const chi, + const SU3MatrixBlock *const u, double const alpha, double const beta, + int const isign, int const cb) override { + upstream_dslash.dslashAChiMinusBDPsi(res, psi, chi, u, alpha, beta, isign, cb); + } + + private: + ::QPhiX::Dslash upstream_dslash; + + double const mass_factor_alpha; + double const mass_factor_beta; +}; + +template +class WilsonTMDslash : public Dslash { + public: + typedef typename ::QPhiX::Geometry::FourSpinorBlock Spinor; + typedef typename ::QPhiX::Geometry::SU3MatrixBlock SU3MatrixBlock; + + WilsonTMDslash(::QPhiX::Geometry *geom_, double const t_boundary_, + double const aniso_coeff_S_, double const aniso_coeff_T_, double const mass_, + double const twisted_mass_, bool use_tbc_[4] = nullptr, + double tbc_phases_[4][2] = nullptr) + : Dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, + mass_, use_tbc_, tbc_phases_), + upstream_dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, mass_, twisted_mass_, + use_tbc_, tbc_phases_), + mass_factor_alpha(4.0 + mass_), + mass_factor_beta(0.25), + derived_mu(twisted_mass_ / mass_factor_alpha), + derived_mu_inv(mass_factor_alpha / + (mass_factor_alpha * mass_factor_alpha + twisted_mass_ * twisted_mass_)) {} + + void A_chi(Spinor *const out, Spinor const *const in, int const isign, + int const cb_ignored) override { + helper_A_chi(out, in, -derived_mu * isign, mass_factor_alpha); + } + + void A_inv_chi(Spinor *const out, Spinor const *const in, int const isign, + int const cb_ignored) override { + helper_A_chi(out, in, derived_mu * isign, derived_mu_inv); + } + + void dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, + int const isign, int const cb) override { + upstream_dslash.dslash(res, psi, u, isign, cb); + } + + void achimbdpsi(Spinor *const res, const Spinor *const psi, const Spinor *const chi, + const SU3MatrixBlock *const u, double const alpha, double const beta, + int const isign, int const cb) override { + upstream_dslash.dslashAChiMinusBDPsi(res, psi, chi, u, alpha, beta, isign, cb); + } + + private: + void helper_A_chi(Spinor *const out, Spinor const *const in, double const factor_a, + double const factor_b); + + ::QPhiX::TMDslash upstream_dslash; + + double const mass_factor_alpha; + double const mass_factor_beta; + double const derived_mu; + double const derived_mu_inv; +}; + +template +class WilsonClovDslash : public Dslash { + public: + typedef typename ::QPhiX::Geometry::FourSpinorBlock Spinor; + typedef typename ::QPhiX::Geometry::SU3MatrixBlock SU3MatrixBlock; + typedef typename ::QPhiX::Geometry::CloverBlock CloverBlock; + + WilsonClovDslash(::QPhiX::Geometry *geom_, + double const t_boundary_, double const aniso_coeff_S_, + double const aniso_coeff_T_, double const mass_, + CloverBlock *const (&clover_)[2], CloverBlock *const (&inv_clover_)[2], + bool use_tbc_[4] = nullptr, double tbc_phases_[4][2] = nullptr) + : Dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, + mass_, use_tbc_, tbc_phases_), + upstream_dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, use_tbc_, tbc_phases_), + mass_factor_alpha(4.0 + mass_), + mass_factor_beta(1.0 / (4.0 * mass_factor_alpha)) { + for (int cb : {0, 1}) { + clover[cb] = clover_[cb]; + inv_clover[cb] = inv_clover_[cb]; + } + } + + void A_chi(Spinor *const out, Spinor const *const in, int const isign_ignored, + int const cb) override { + clover_product(out, in, clover[cb], upstream_dslash.getGeometry()); + } + + void A_inv_chi(Spinor *const out, Spinor const *const in, int const isign_ignored, + int const cb) override { + clover_product(out, in, inv_clover[cb], upstream_dslash.getGeometry()); + } + + void dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, + int const isign, int const cb) override { + upstream_dslash.dslash(res, psi, u, inv_clover[cb], isign, cb); + } + + void achimbdpsi(Spinor *const res, const Spinor *const psi, const Spinor *const chi, + const SU3MatrixBlock *const u, double const alpha, double const beta, + int const isign, int const cb) override { + upstream_dslash.dslashAChiMinusBDPsi(res, psi, chi, u, clover[cb], mass_factor_beta, isign, cb); + } + + private: + ::QPhiX::ClovDslash upstream_dslash; + + double const mass_factor_alpha; + double const mass_factor_beta; + + /** + Reference to the clover term. + + This class has to provide a `dslash` and `achimbdpsi` member function with the prescribed + argument list which does not contain the clover term. The user of these classes should not have + to differentiate between non-clover and clover variants. In order to provide the function + signature, the clover term is a member. This means that the user has to construct a new operator + if the pointers to the clover field need to be changed. Seperate pointers are kept for the fields + on the even and odd checkerboards, hence the array dimension. + */ + CloverBlock *clover[2]; + + /// See \ref clover. + CloverBlock *inv_clover[2]; +}; + +template +class WilsonClovTMDslash : public Dslash { + public: + typedef typename ::QPhiX::Geometry::FourSpinorBlock Spinor; + typedef typename ::QPhiX::Geometry::SU3MatrixBlock SU3MatrixBlock; + typedef + typename ::QPhiX::Geometry::FullCloverBlock FullCloverBlock; + typedef + typename ::QPhiX::Geometry::CloverBlock CloverBlock; + + WilsonClovTMDslash(::QPhiX::Geometry *geom_, + double const t_boundary_, double const aniso_coeff_S_, + double const aniso_coeff_T_, double const mass_, double const twisted_mass_, + CloverBlock *const (&clover_)[2], + FullCloverBlock *const (&inv_clover_)[2][2], bool use_tbc_[4] = nullptr, + double tbc_phases_[4][2] = nullptr) + : Dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, + mass_, use_tbc_, tbc_phases_), + upstream_dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, use_tbc_, tbc_phases_), + mass_factor_alpha(4.0 + mass_), + mass_factor_beta(0.25), + derived_mu(twisted_mass_ / mass_factor_alpha), + derived_mu_inv(mass_factor_alpha / + (mass_factor_alpha * mass_factor_alpha + twisted_mass_ * twisted_mass_)) { + for (int cb : {0, 1}) { + clover[cb] = clover_[cb]; + for (int fl : {0, 1}) { + inv_clover[cb][fl] = inv_clover_[cb][fl]; + } + } + } + + void A_chi(Spinor *const out, Spinor const *const in, int const isign, int const cb) override { + clover_product(out, in, clover[cb], upstream_dslash.getGeometry()); + // TODO: add twisted mass here + } + + void A_inv_chi(Spinor *const out, Spinor const *const in, int const isign, + int const cb) override { + if (isign == -1) { + clover_product(out, in, inv_clover[cb][1], upstream_dslash.getGeometry()); + } else { + clover_product(out, in, inv_clover[cb][0], upstream_dslash.getGeometry()); + } + } + + void dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, + int const isign, int const cb) override { + upstream_dslash.dslash(res, psi, u, (const FullCloverBlock **)inv_clover[cb], isign, cb); + } + + void achimbdpsi(Spinor *const res, const Spinor *const psi, const Spinor *const chi, + const SU3MatrixBlock *const u, double const alpha, double const beta, + int const isign, int const cb) override { + upstream_dslash.dslashAChiMinusBDPsi(res, psi, chi, u, clover[cb], + mass_factor_beta, isign, cb); + } + + private: + ::QPhiX::TMClovDslash upstream_dslash; + + double const mass_factor_alpha; + double const mass_factor_beta; + double const derived_mu; + double const derived_mu_inv; + + CloverBlock *clover[2]; + /* For twisted clover, there are two fields on each checkerboard which differ in the sign + * of the twisted quark mass. In effect then, the inner index can be thought of as being + * in flavour space while the outer index is the checkerboard index. + */ + FullCloverBlock *inv_clover[2][2]; +}; + +template +void WilsonTMDslash::helper_A_chi(Spinor *const out, + Spinor const *const in, + double const factor_a, + double const factor_b) { + auto const nVecs = upstream_dslash.getGeometry().nVecs(); + auto const Pxy = upstream_dslash.getGeometry().getPxy(); + auto const Pxyz = upstream_dslash.getGeometry().getPxyz(); + + for (uint64_t t = 0; t < T; t++) + for (uint64_t x = 0; x < LX / 2; x++) + for (uint64_t y = 0; y < LY; y++) + for (uint64_t z = 0; z < LZ; z++) { + uint64_t const SIMD_vector = x / soalen; + uint64_t const x_internal = x % soalen; + uint64_t const qphix_idx = t * Pxyz + z * Pxy + y * nVecs + SIMD_vector; + + for (int color = 0; color < 3; ++color) { + for (int spin_block = 0; spin_block < 2; ++spin_block) { + // Implement the $\gamma_5$ structure. + auto const signed_factor_a = factor_a * (spin_block == 0 ? 1.0 : -1.0); + + for (int half_spin = 0; half_spin < 2; ++half_spin) { + auto const four_spin = 2 * spin_block + half_spin; + for (int v = 0; v < soalen; ++v) { + auto &out_bcs = out[qphix_idx][color][four_spin]; + auto const &in_bcs = in[qphix_idx][color][four_spin]; + + out_bcs[re][v] = factor_b * (in_bcs[re][v] + signed_factor_a * in_bcs[im][v]); + out_bcs[im][v] = factor_b * (in_bcs[im][v] - signed_factor_a * in_bcs[re][v]); + } + } + } + } + + } // volume +}; + +template +void Dslash::prepare_source(Spinor *const tilde_b_odd, + Spinor const *const b_even, + Spinor const *const b_odd, + SU3MatrixBlock const *const u) { + auto Mee_be = QPhiX::makeFourSpinorHandle(*geom); + WilsonDslash plain_dslash(geom, t_boundary, aniso_coeff_S, + aniso_coeff_T, mass); + + A_inv_chi(Mee_be.get(), b_even, 1, cb_even); + + plain_dslash.dslash(tilde_b_odd, Mee_be.get(), u, 1, cb_odd); + + // FIXME Perhaps use a variable number of BLAS threads here (last parameter). + QPhiX::aypx(0.5, Mee_be.get(), tilde_b_odd, *geom, 1); +} + +template +void Dslash::reconstruct_solution(Spinor *const x_even, + Spinor const *const b_even, + Spinor const *const x_odd, + SU3MatrixBlock const *const u) { + auto tmp = QPhiX::makeFourSpinorHandle(*geom); + WilsonDslash plain_dslash(geom, t_boundary, aniso_coeff_S, + aniso_coeff_T, mass); + + plain_dslash.dslash(tmp.get(), x_odd, u, 1, cb_even); + QPhiX::aypx(0.5, b_even, tmp.get(), *geom, 1); + A_inv_chi(x_even, tmp.get(), 1, cb_even); +} +} diff --git a/src/lib/qphix/qphix_interface.cpp b/src/lib/qphix/qphix_interface.cpp new file mode 100644 index 000000000..2c61427dd --- /dev/null +++ b/src/lib/qphix/qphix_interface.cpp @@ -0,0 +1,2192 @@ +/*********************************************************************** + * + * Copyright (C) 2015 Mario Schroeck + * 2016 Peter Labus + * 2017 Peter Labus, Martin Ueding, Bartosz Kostrzewa + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + ***********************************************************************/ + +#include "qphix_interface.h" +#include "qphix_interface.hpp" +#include "qphix_interface_utils.hpp" +#include "qphix_types.h" +#include "qphix_veclen.h" + +#ifdef TM_USE_MPI +#include +#endif + +extern "C" { +#ifdef HAVE_CONFIG_H +#include "tmlqcd_config.h" +#endif +#include "boundary.h" +#include "geometry_eo.h" +#include "gettime.h" +#include "global.h" +#include "linalg/convert_eo_to_lexic.h" +#include "linalg/diff.h" +#include "linalg/square_norm.h" +#include "misc_types.h" +#include "operator/Hopping_Matrix.h" +#include "operator/clover_leaf.h" +#include "operator/clovertm_operators.h" +#include "operator_types.h" +#include "struct_accessors.h" + +// for the normalisation of the heavy doublet when running +// RHMC +#include "phmc.h" + +#include "solver/matrix_mult_typedef.h" +#include "solver/solver.h" +#include "solver/solver_field.h" +#include "solver/solver_params.h" +#include "solver/solver_types.h" +#include "start.h" +#include "xchange/xchange_gauge.h" +} +#ifdef TM_USE_OMP +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace tmlqcd; + +tm_QPhiXParams_t qphix_input; + +int By; +int Bz; +int NCores; +int Sy; +int Sz; +int PadXY; +int PadXYZ; +int MinCt; +int N_simt; +bool compress12; +QphixPrec_t qphix_precision; +QphixPrec_t qphix_inner_precision; + +int subLattSize[4]; +int lattSize[4]; +int qmp_geom[4]; +int qmp_tm_map[4]; + +// angles for boundary phases, values come from read_input +extern double X0, X1, X2, X3; + +bool use_tbc[4]; +double tbc_phases[4][2]; +// we always use twisted boundary conditions, which means that we are always +// periodic in time and any possible anti-periodicity is implemented via +// the phase +double constexpr t_boundary = 1.0; + +template +struct rsdTarget { + static const double value; +}; + +template <> +const double rsdTarget::value = 1.0e-3; + +template <> +const double rsdTarget::value = 1.0e-8; + +void _initQphix(int argc, char **argv, tm_QPhiXParams_t params, int c12, QphixPrec_t precision_, + QphixPrec_t inner_precision_) { + static bool qmp_topo_initialised = false; + + // Global Lattice Size + lattSize[0] = LX * g_nproc_x; + lattSize[1] = LY * g_nproc_y; + lattSize[2] = LZ * g_nproc_z; + lattSize[3] = T * g_nproc_t; + + // Local Lattice Size + subLattSize[0] = LX; + subLattSize[1] = LY; + subLattSize[2] = LZ; + subLattSize[3] = T; + + // extract twisted boundary conditions + for (int dim = 0; dim < 4; dim++) { + bool dim_tbc = false; + double dim_phase[2] = {1.0, 0.0}; + if (dim == 0) { + dim_tbc = (fabs(X1) > DBL_EPSILON); + dim_phase[0] = -((double *)(&phase_1))[0] / g_kappa; + dim_phase[1] = -((double *)(&phase_1))[1] / g_kappa; + } else if (dim == 1) { + dim_tbc = (fabs(X2) > DBL_EPSILON); + dim_phase[0] = -((double *)(&phase_2))[0] / g_kappa; + dim_phase[1] = -((double *)(&phase_2))[1] / g_kappa; + } else if (dim == 2) { + dim_tbc = (fabs(X3) > DBL_EPSILON); + dim_phase[0] = -((double *)(&phase_3))[0] / g_kappa; + dim_phase[1] = -((double *)(&phase_3))[1] / g_kappa; + } else if (dim == 3) { + dim_tbc = (fabs(X0) > DBL_EPSILON); + dim_phase[0] = -((double *)(&phase_0))[0] / g_kappa; + dim_phase[1] = -((double *)(&phase_0))[1] / g_kappa; + } + use_tbc[dim] = dim_tbc; + tbc_phases[dim][0] = dim_phase[0]; + tbc_phases[dim][1] = dim_phase[1]; + } + + By = params.By; + Bz = params.Bz; + NCores = params.NCores; + Sy = params.Sy; + Sz = params.Sz; + PadXY = params.PadXY; + PadXYZ = params.PadXYZ; + MinCt = params.MinCt; + N_simt = Sy * Sz; + if (c12 == 8) { + QPhiX::masterPrintf( + "# INFO QphiX: 8-parameter gauge compression not supported, using two row compression " + "instead!\n"); + c12 = 12; + } + compress12 = c12 == 12 ? true : false; + qphix_precision = precision_; + qphix_inner_precision = inner_precision_; + +#ifdef QPHIX_QMP_COMMS + // Declare the logical topology + if (!qmp_topo_initialised) { + // the QMP topology is the one implied by the number of processes in each + // dimension as required by QPHIX ( x fastest to t slowest running ) + qmp_geom[0] = g_nproc_x; + qmp_geom[1] = g_nproc_y; + qmp_geom[2] = g_nproc_z; + qmp_geom[3] = g_nproc_t; + + // in order for the topologies to agree between tmLQCD and QPhiX, the dimensions need to be + // permuted + // since Z is fastest in tmLQCD and X is second-slowest + qmp_tm_map[0] = 2; + qmp_tm_map[1] = 1; + qmp_tm_map[2] = 0; + qmp_tm_map[3] = 3; + if (QMP_declare_logical_topology_map(qmp_geom, 4, qmp_tm_map, 4) != QMP_SUCCESS) { + QMP_error("Failed to declare QMP Logical Topology\n"); + abort(); + } + // longish test to check if the logical coordinates are correctly mapped + if (g_debug_level >= 5) { + for (int proc = 0; proc < g_nproc; proc++) { + if (proc == g_proc_id) { + const int coordinates[4] = {g_proc_coords[1], g_proc_coords[2], g_proc_coords[3], + g_proc_coords[0]}; + int id = QMP_get_node_number_from(coordinates); + int *qmp_coords = QMP_get_logical_coordinates_from(id); + fflush(stdout); + printf("QMP id: %3d x:%3d y:%3d z:%3d t:%3d\n", id, qmp_coords[0], qmp_coords[1], + qmp_coords[2], qmp_coords[3]); + printf("MPI id: %3d x:%3d y:%3d z:%3d t:%3d\n\n", g_proc_id, g_proc_coords[1], + g_proc_coords[2], g_proc_coords[3], g_proc_coords[0]); + free(qmp_coords); + fflush(stdout); + MPI_Barrier(MPI_COMM_WORLD); + } else { + MPI_Barrier(MPI_COMM_WORLD); + } + } + } + qmp_topo_initialised = true; + } +#endif + +#ifdef QPHIX_QPX_SOURCE + if (thread_bind) { + QPhiX::setThreadAffinity(NCores_user, Sy_user * Sz_user); + } + QPhiX::reportAffinity(); +#endif +} + +void _initQphix(int argc, char **argv, tm_QPhiXParams_t params, int c12, QphixPrec_t precision_) { + _initQphix(argc, argv, params, c12, precision_, precision_); +} + +// Finalize the QPhiX library +void _endQphix() {} + +template +void reorder_clover_to_QPhiX( + QPhiX::Geometry &geom, + typename QPhiX::Geometry::CloverBlock *qphix_clover, int cb, + bool inverse, bool fl_offdiag = false) { + const double startTime = gettime(); + + /* the spin-colour clover term in sw_term and the corresponding inverse + * in sw_inv are stored in the tmLQCD gamma basis. + * When we translate spinors to QPhiX, we apply a transformation V to the tmLQCD + * spinor and then apply the same transformation to the output spinor + * ( we have V^dagger = V and V*V = 1 ) + * Thus, in order to translate the clover field, we need to copy + * (1+T)' = V*(1+T)*V, where T is the spin-colour clover-term + * This way, the clover term will be in the correct gamma basis. + * + * The tmLQCD clover term is stored in half-spinor blocks of colour matrices + * for which we need to work out what (1+T)'=V*(1+T)*V implies. + * Below, each sAB represents one 3x3 colour matrix + * + * +s33 -s32 0 0 + * T' = V*T*V = -s23 +s22 0 0 + * 0 0 +s11 -s10 + * 0 0 -s01 +s00 + * + * Such that the half-spinor blocks are inverted and within these, the ordering is + * reversed. Note that the off-diagonal 3x3 colour blocks are hermitian conjugate to + * each other and this is preserved by the transformation. + * + * The QPhiX (Wilson) clover term is stored as 12 reals on the diagonal + * in two 6-element vectors, one for each half-spinor spin pair + * and two sets of off-diagonal complex components. + * + * In addition, colour matrices are transposed in QPhiX. + * + * The tmLQCD clover term is stored as: + * + * s00 s01 + * s11 + * T = s22 s23 + * s33 + * + * with indexing + * + * sw[0][0] sw[1][0] + * sw[2][0] + * sw[0][1] sw[1][1] + * sw[2][1] + * + * The inverse has four su3 blocks instead and is indexed + * sw_inv[0][0] sw_inv[1][0] + * sw_inv[3][0] sw_inv[2][0] + * sw_inv[0][1] sw_inv[1][1] + * sw_inv[3][1] sw_inv[2][1] + * + * where blocks sw_inv[3][0] and sw_inv[3][1] are relevant only when mu > 0 + * + * There is a special case for the non-degenerate twisted clover operator. The + * flavour-off-diagonal components of the inverse clover term do not have an imaginary part on the + * spin-colour diagonal. They can thus be stored as CloverBlock, which is done in the QPhiX + * implementation of the ND tmclover operator. + * + * As a hack, this inverse is prepared by sw_invert_epsbar and placed in to the last + * VOLUME/2 sites of sw_inv. Reading from there is triggered by the boolean + * fl_offdiag. + */ + + // rescale to get clover term (or its inverse) in the physical normalisation + // rather than the kappa normalisation + const double scale = inverse ? 2.0 * g_kappa : 1.0 / (2.0 * g_kappa); + su3 ***tm_clover = inverse ? sw_inv : sw; + + // Number of elements in spin, color & complex + const int Ns = 4; + const int Nc = 3; + const int Nz = 2; + + // Geometric parameters for QPhiX data layout + const auto ngy = geom.nGY(); + const auto nVecs = geom.nVecs(); + const auto Pxy = geom.getPxy(); + const auto Pxyz = geom.getPxyz(); + + // packer for Wilson clover (real diagonal + complex upper-triangular) + /* for the index in the off_diagN arrays, we map to an index in the su3 struct + * keeping in mind complex conjugation + * The off-diagonal in QPhiX is stored as follows: + * + * 0 1 3 6 10 + * 2 4 7 11 + * 5 8 12 + * 9 13 + * 14 + * + * which we are going to map to su3 in blocks + * + * 0* 1* + * 2* + * + * 3 4 5 + * 6 7 8 + * 10 11 12 + * + * 9* 13* + * 14* + * + * where the asterisk indicates complex conjugation. As a linear array then, + * these mappings are: + * + */ + const int od_su3_offsets[15] = {Nz, + 2 * Nz, // 0 1 + Nc * Nz + 2 * Nz, // 2 + + 0, + Nz, + 2 * Nz, // 3 4 5 + Nc * Nz, + Nc * Nz + Nz, + Nc * Nz + 2 * Nz, // 6 7 8 + + Nz, // 9 + + 2 * Nc * Nz, + 2 * Nc * Nz + Nz, + 2 * Nc * Nz + 2 * Nz, // 10 11 12 + + 2 * Nz, + Nc * Nz + 2 * Nz}; // 13 14 + +#pragma omp parallel for collapse(4) + for (int64_t t = 0; t < T; t++) { + for (int64_t z = 0; z < LZ; z++) { + for (int64_t y = 0; y < LY; y++) { + for (int64_t v = 0; v < nVecs; v++) { + int64_t block = (t * Pxyz + z * Pxy) / ngy + (y / ngy) * nVecs + v; + + for (int64_t x_soa = 0; x_soa < SOALEN; x_soa++) { + int64_t xx = (y % ngy) * SOALEN + x_soa; + int64_t q_cb_x_coord = x_soa + v * SOALEN; + int64_t tm_x_coord = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ cb); + + // the inverse of the clover term is in even-odd ordering + // while the clover term itself is lexicographically ordered + // for the special case of the nd tmclover operator, the inverse of the flavour + // off-diagonal components is stored in the last VOLUME/2 elements of sw_inv + int64_t tm_idx = + (inverse ? g_lexic2eosub[g_ipt[t][tm_x_coord][y][z]] : g_ipt[t][tm_x_coord][y][z]) + + ((inverse && fl_offdiag) ? VOLUME / 2 : 0); + + int b_idx; + + // we begin with the diagonal elements in CloverBlock + for (int d = 0; d < 6; d++) { + // choose the block in sw which corresponds to the block in T' + b_idx = d < 3 ? 2 : 0; + // get the right colour components + qphix_clover[block].diag1[d][xx] = QPhiX::rep( + *(reinterpret_cast(&tm_clover[tm_idx][b_idx][1].c00) + + (Nc * Nz + Nz) * (d % 3)) * + scale); + + qphix_clover[block].diag2[d][xx] = QPhiX::rep( + *(reinterpret_cast(&tm_clover[tm_idx][b_idx][0].c00) + + (Nc * Nz + Nz) * (d % 3)) * + scale); + } + + b_idx = 2; // s33 and s11 + for (int od : {0, 1, 2}) { + for (int reim : {0, 1}) { + qphix_clover[block].off_diag1[od][reim][xx] = QPhiX::rep( + (reim == 1 ? -1.0 : 1.0) * + *(reinterpret_cast(&tm_clover[tm_idx][b_idx][1].c00) + + od_su3_offsets[od] + reim) * + scale); + + qphix_clover[block].off_diag2[od][reim][xx] = QPhiX::rep( + (reim == 1 ? -1.0 : 1.0) * + *(reinterpret_cast(&tm_clover[tm_idx][b_idx][0].c00) + + od_su3_offsets[od] + reim) * + scale); + } + } + + b_idx = 1; // s32 and s10 + for (int od : {3, 4, 5, 6, 7, 8, 10, 11, 12}) { + for (int reim : {0, 1}) { + qphix_clover[block].off_diag1[od][reim][xx] = QPhiX::rep( + *(reinterpret_cast(&tm_clover[tm_idx][b_idx][1].c00) + + od_su3_offsets[od] + reim) * + (-scale)); + + qphix_clover[block].off_diag2[od][reim][xx] = QPhiX::rep( + *(reinterpret_cast(&tm_clover[tm_idx][b_idx][0].c00) + + od_su3_offsets[od] + reim) * + (-scale)); + } + } + + b_idx = 0; // s22 and s00 + for (int od : {9, 13, 14}) { + for (int reim : {0, 1}) { + qphix_clover[block].off_diag1[od][reim][xx] = QPhiX::rep( + (reim == 1 ? -1.0 : 1.0) * + *(reinterpret_cast(&tm_clover[tm_idx][b_idx][1].c00) + + od_su3_offsets[od] + reim) * + scale); + + qphix_clover[block].off_diag2[od][reim][xx] = QPhiX::rep( + (reim == 1 ? -1.0 : 1.0) * + *(reinterpret_cast(&tm_clover[tm_idx][b_idx][0].c00) + + od_su3_offsets[od] + reim) * + scale); + } + } + + } // x_soa + } // for(v) + } // for(y) + } // for(z) + } // for(t) + + const double diffTime = gettime() - startTime; + if (g_debug_level > 1) { + QPhiX::masterPrintf( + "# QPHIX-interface: time spent in reorder_clover_to_QPhiX (CloverBlock): %f secs\n", + diffTime); + } +} + +template +void reorder_clover_to_QPhiX( + QPhiX::Geometry &geom, + typename QPhiX::Geometry::FullCloverBlock *qphix_clover[2], + int cb, bool inverse) { + const double startTime = gettime(); + + /* the spin-colour clover term in sw_term and the corresponding inverse + * in sw_inv are stored in the tmLQCD gamma basis. + * When we translate spinors to QPhiX, we apply a transformation V to the tmLQCD + * spinor and then apply the same transformation to the output spinor + * ( we have V^dagger = V and V*V = 1 ) + * Thus, in order to translate the clover field, we need to copy + * (1+T)' = V*(1+T)*V, where T is the spin-colour clover-term + * This way, the clover term will be in the correct gamma basis. + * + * The tmLQCD clover term is stored in half-spinor blocks of colour matrices + * for which we need to work out what (1+T)'=V*(1+T)*V implies. + * Below, each sAB represents one 3x3 colour matrix + * + * +s33 -s32 0 0 + * T' = V*T*V = -s23 +s22 0 0 + * 0 0 +s11 -s10 + * 0 0 -s01 +s00 + * + * Such that the half-spinor blocks are inverted and within these, the ordering is + * reversed. Note that the off-diagonal 3x3 colour blocks are hermitian conjugate to + * each other and this is preserved by the transformation. + * + * The QPhiX (tmclover) clover term and its inverse are stored as a pair of full + * 6x6 complex matrices which are multiplied with the spinor in exactly the same way + * as in tmLQCD. + * + * The tmLQCD clover term is stored as: + * + * s00 s01 + * s11 + * T = s22 s23 + * s33 + * + * with indexing + * + * sw[0][0] sw[1][0] + * sw[2][0] + * sw[0][1] sw[1][1] + * sw[2][1] + * + * The inverse has four su3 blocks instead and is indexed + * sw_inv[0][0] sw_inv[1][0] + * sw_inv[3][0] sw_inv[2][0] + * sw_inv[0][1] sw_inv[1][1] + * sw_inv[3][1] sw_inv[2][1] + * + * where blocks sw_inv[3][0] and sw_inv[3][1] are relevant only when mu > 0 * + */ + + // rescale to get clover term (or its inverse) in the physical normalisation + // rather than the kappa normalisation + const double scale = inverse ? 2.0 * g_kappa : 1.0 / (2.0 * g_kappa); + su3 ***tm_clover = inverse ? sw_inv : sw; + + // Number of elements in spin, color & complex + const int Ns = 4; + const int Nc = 3; + const int Nz = 2; + + const double amu = g_mu / (2.0 * g_kappa); + + // Geometric parameters for QPhiX data layout + const auto ngy = geom.nGY(); + const auto nVecs = geom.nVecs(); + const auto Pxy = geom.getPxy(); + const auto Pxyz = geom.getPxyz(); + +#pragma omp parallel for collapse(4) + for (int64_t t = 0; t < T; t++) { + for (int64_t z = 0; z < LZ; z++) { + for (int64_t y = 0; y < LY; y++) { + for (int64_t v = 0; v < nVecs; v++) { + int64_t block = (t * Pxyz + z * Pxy) / ngy + (y / ngy) * nVecs + v; + + for (int64_t x_soa = 0; x_soa < SOALEN; x_soa++) { + int64_t xx = (y % ngy) * SOALEN + x_soa; + int64_t q_cb_x_coord = x_soa + v * SOALEN; + int64_t tm_x_coord = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ cb); + + // the inverse of the clover term is in even-odd ordering + // while the clover term itself is lexicographically ordered + int64_t tm_idx = + inverse ? g_lexic2eosub[g_ipt[t][tm_x_coord][y][z]] : g_ipt[t][tm_x_coord][y][z]; + + for (int fl : {0, 1}) { + if (inverse && fl == 1) { + // the inverse clover term for the second flavour is stored at an offset + tm_idx += VOLUME / 2; + } + for (int q_hs : {0, 1}) { + auto &hs_block = + ((q_hs == 0) ? qphix_clover[fl][block].block1 : qphix_clover[fl][block].block2); + for (int q_sc1 = 0; q_sc1 < 6; q_sc1++) { + for (int q_sc2 = 0; q_sc2 < 6; q_sc2++) { + const int q_s1 = q_sc1 / 3; + const int q_s2 = q_sc2 / 3; + const int q_c1 = q_sc1 % 3; + const int q_c2 = q_sc2 % 3; + + // invert in spin as required by V*T*V + const int t_hs = 1 - q_hs; + // the indices inside the half-spinor are also inverted + // (which transposes them, of course) + const int t_s1 = 1 - q_s1; + const int t_s2 = 1 - q_s2; + // carry out the mapping from T' to T, keeping in mind that for the inverse + // there are four blocks also on the tmLQCD side, otherwise there are just three + const int t_b_idx = t_s1 + t_s2 + ((inverse && t_s1 == 1 && t_s2 == 0) ? 2 : 0); + for (int reim : {0, 1}) { + hs_block[q_sc1][q_sc2][reim][xx] = QPhiX::rep( + scale * + // off-diagonal (odd-numbered) blocks change sign + (t_b_idx & 1 ? (-1.0) : 1.0) * + // if not doing the inverse and in the bottom-left block, need to + // complex conjugate + ((!inverse && (t_s1 == 1 && t_s2 == 0) && reim == 1) ? -1.0 : 1.0) * + *(reinterpret_cast( + &(tm_clover[tm_idx][t_b_idx][t_hs].c00)) + + // if not doing the inverse and in the bottom-left block, transpose + // in colour + // because we're actually reading out of the top-right block + Nz * ((!inverse && (t_s1 == 1 && t_s2 == 0)) ? Nc * q_c2 + q_c1 + : Nc * q_c1 + q_c2) + + reim) + + // in the QPhiX gamma basis, the twisted quark mass enters with the + // opposite + // sign for consistency + ((!inverse && q_sc1 == q_sc2 && q_hs == 0 && reim == 1) + ? -amu * (1 - 2 * fl) + : 0) + + ((!inverse && q_sc1 == q_sc2 && q_hs == 1 && reim == 1) + ? amu * (1 - 2 * fl) + : 0)); + } + } // q_sc2 + } // q_sc1 + } // q_hs + } // fl + + } // x_soa + } // for(v) + } // for(y) + } // for(z) + } // for(t) + + const double diffTime = gettime() - startTime; + if (g_debug_level > 1) { + QPhiX::masterPrintf( + "# QPHIX-interface: time spent in reorder_clover_to_QPhiX (FullCloverBlock): %f secs\n", + diffTime); + } +} + +template +void reorder_gauge_to_QPhiX( + QPhiX::Geometry &geom, + typename QPhiX::Geometry::SU3MatrixBlock *qphix_gauge_cb0, + typename QPhiX::Geometry::SU3MatrixBlock *qphix_gauge_cb1) { + const double startTime = gettime(); + + // Number of elements in spin, color & complex + // Here c1 is QPhiX's outer color, and c2 the inner one + const int Ns = 4; + const int Nc1 = compress12 ? 2 : 3; + const int Nc2 = 3; + const int Nz = 2; + + // Geometric parameters for QPhiX data layout + const auto ngy = geom.nGY(); + const auto nVecs = geom.nVecs(); + const auto Pxy = geom.getPxy(); + const auto Pxyz = geom.getPxyz(); + + // This is needed to translate between the different + // orderings of the direction index "\mu" in tmlQCD + // and QPhiX, respectively + // in qphix, the Dirac operator is applied in the order + // -+x -> -+y -> -+z -> -+t + // while tmlqcd does + // -+t -> -+x -> -+y -> -+z + // same as the lattice ordering + // The mappingn between the application dimensions is thus: + // tmlqcd_dim(t(0) -> x(1) -> y(2) -> z(3)) = qphix_dim( t(3) -> x(0) -> y(1) -> z(2) ) + const int change_dim[4] = {1, 2, 3, 0}; + + // Get the base pointer for the (global) tmlQCD gauge field + xchange_gauge(g_gauge_field); + const double *in = reinterpret_cast(&g_gauge_field[0][0].c00); + +#pragma omp parallel for collapse(4) + for (int64_t t = 0; t < T; t++) + for (int64_t z = 0; z < LZ; z++) + for (int64_t y = 0; y < LY; y++) + for (int64_t v = 0; v < nVecs; v++) { + int64_t block = (t * Pxyz + z * Pxy) / ngy + (y / ngy) * nVecs + v; + + for (int dim = 0; dim < 4; dim++) // dimension == QPhiX \mu + for (int c1 = 0; c1 < Nc1; c1++) // QPhiX convention color 1 (runs up to 2 or 3) + for (int c2 = 0; c2 < Nc2; c2++) // QPhiX convention color 2 (always runs up to 3) + for (int x_soa = 0; x_soa < SOALEN; x_soa++) { + int64_t xx = (y % ngy) * SOALEN + x_soa; + int64_t q_cb_x_coord = x_soa + v * SOALEN; + int64_t tm_x_coord_cb0 = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ 0); + int64_t tm_x_coord_cb1 = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ 1); + + int64_t tm_idx_cb0; + int64_t tm_idx_cb1; + + // backward / forward + for (int dir = 0; dir < 2; dir++) { + if (dir == 0) { + tm_idx_cb0 = g_idn[g_ipt[t][tm_x_coord_cb0][y][z]][change_dim[dim]]; + tm_idx_cb1 = g_idn[g_ipt[t][tm_x_coord_cb1][y][z]][change_dim[dim]]; + } else { + tm_idx_cb0 = g_ipt[t][tm_x_coord_cb0][y][z]; + tm_idx_cb1 = g_ipt[t][tm_x_coord_cb1][y][z]; + } + for (int reim = 0; reim < Nz; reim++) { + // Note: + // ----- + // 1. \mu in QPhiX runs from 0..7 for all eight neighbouring + // links. + // Here, the ordering of the direction (backward/forward) + // is the same + // for tmlQCD and QPhiX, but we have to change the + // ordering of the dimensions. + int q_mu = 2 * dim + dir; + + qphix_gauge_cb0[block][q_mu][c1][c2][reim][xx] = + QPhiX::rep(su3_get_elem( + &(g_gauge_field[tm_idx_cb0][change_dim[dim]]), c2, c1, reim)); + qphix_gauge_cb1[block][q_mu][c1][c2][reim][xx] = + QPhiX::rep(su3_get_elem( + &(g_gauge_field[tm_idx_cb1][change_dim[dim]]), c2, c1, reim)); + } + } + } // for(dim,c1,c2,x_soa) + } // outer loop (t,z,y,v) + + const double diffTime = gettime() - startTime; + if (g_debug_level > 1) { + QPhiX::masterPrintf("# QPHIX-interface: time spent in reorder_gauge_to_QPhiX: %f secs\n", + diffTime); + } +} + +// Reorder tmLQCD eo-spinor to a FourSpinorBlock QPhiX spinor on the given checkerboard +template +void reorder_eo_spinor_to_QPhiX( + QPhiX::Geometry &geom, spinor const *const tm_eo_spinor, + typename QPhiX::Geometry::FourSpinorBlock *qphix_spinor, + const int cb) { + const double startTime = gettime(); + + const int Ns = 4; + const int Nc = 3; + const int Nz = 2; + + const auto nVecs = geom.nVecs(); + const auto Pxy = geom.getPxy(); + const auto Pxyz = geom.getPxyz(); + const auto Nxh = geom.Nxh(); + + // This is needed to translate between the different + // gamma bases tmlQCD and QPhiX are using + // (note, this is a 4x4 matrix with 4 non-zero elements) + const int change_sign[4] = {1, -1, -1, 1}; + const int change_spin[4] = {3, 2, 1, 0}; + +#pragma omp parallel for collapse(4) + for (int64_t t = 0; t < T; t++) { + for (int64_t z = 0; z < LZ; z++) { + for (int64_t y = 0; y < LY; y++) { + for (int64_t v = 0; v < nVecs; v++) { + for (int col = 0; col < Nc; col++) { + for (int q_spin = 0; q_spin < Ns; q_spin++) { + for (int x_soa = 0; x_soa < SOALEN; x_soa++) { + int64_t q_ind = t * Pxyz + z * Pxy + y * nVecs + v; + int64_t q_cb_x_coord = v * SOALEN + x_soa; + // when t+y+z is odd and we're on an odd (1) checkerboard OR + // when t+y+z is even and we're on an even (0) checkerboard + // the full x coordinate is 2*x_cb + // otherwise, it is 2*x_cb+1 + int64_t tm_x_coord = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ cb); + // exchange x and z dimensions + int64_t tm_eo_ind = g_lexic2eosub[g_ipt[t][tm_x_coord][y][z]]; + + for (int reim = 0; reim < 2; reim++) { + qphix_spinor[q_ind][col][q_spin][reim][x_soa] = QPhiX::rep( + change_sign[q_spin] * + spinor_get_elem(&(tm_eo_spinor[tm_eo_ind]), change_spin[q_spin], col, reim)); + } + } + } + } + } + } + } + } + const double diffTime = gettime() - startTime; + if (g_debug_level > 1) { + QPhiX::masterPrintf("# QPHIX-interface: time spent in reorder_eo_spinor_to_QPhiX: %f secs\n", + diffTime); + } +} + +template +void reorder_eo_spinor_from_QPhiX( + QPhiX::Geometry &geom, spinor *tm_eo_spinor, + typename QPhiX::Geometry::FourSpinorBlock *qphix_spinor, + const int cb, double normFac = 1.0) { + const double startTime = gettime(); + + const int Ns = 4; + const int Nc = 3; + const int Nz = 2; + + const auto nVecs = geom.nVecs(); + const auto Pxy = geom.getPxy(); + const auto Pxyz = geom.getPxyz(); + const auto Nxh = geom.Nxh(); + + // This is needed to translate between the different + // gamma bases tmlQCD and QPhiX are using + // (note, this is a 4x4 matrix with 4 non-zero elements) + const int change_sign[4] = {1, -1, -1, 1}; + const int change_spin[4] = {3, 2, 1, 0}; + +#pragma omp parallel for collapse(4) + for (int64_t t = 0; t < T; t++) { + for (int64_t z = 0; z < LZ; z++) { + for (int64_t y = 0; y < LY; y++) { + for (int64_t v = 0; v < nVecs; v++) { + for (int col = 0; col < Nc; col++) { + for (int q_spin = 0; q_spin < Ns; q_spin++) { + for (int x_soa = 0; x_soa < SOALEN; x_soa++) { + int64_t q_ind = t * Pxyz + z * Pxy + y * nVecs + v; + int64_t q_cb_x_coord = v * SOALEN + x_soa; + // when t+y+z is odd and we're on an odd checkerboard (1) OR + // when t+y+z is even and we're on an even (0) checkerboard + // the full x coordinate is 2*x_cb + // otherwise, it is 2*x_cb+1 + int64_t tm_x_coord = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ cb); + // exchange x and z dimensions + int64_t tm_eo_ind = g_lexic2eosub[g_ipt[t][tm_x_coord][y][z]]; + + spinor_set_elem( + &(tm_eo_spinor[tm_eo_ind]), change_spin[q_spin], col, + change_sign[q_spin] * normFac * + QPhiX::rep(qphix_spinor[q_ind][col][q_spin][0][x_soa]), + change_sign[q_spin] * normFac * + QPhiX::rep(qphix_spinor[q_ind][col][q_spin][1][x_soa])); + } + } + } + } + } + } + } + const double diffTime = gettime() - startTime; + if (g_debug_level > 1) { + QPhiX::masterPrintf("# QPHIX-interface: time spent in reorder_eo_spinor_from_QPhiX: %f secs\n", + diffTime); + } +} + +// Reorder a full tmLQCD spinor to a cb0 and cb1 QPhiX spinor +template +void reorder_spinor_to_QPhiX(QPhiX::Geometry &geom, + double const *tm_spinor, FT *qphix_spinor_cb0, FT *qphix_spinor_cb1) { + const double startTime = gettime(); + + // Number of elements in spin, color & complex + const int Ns = 4; + const int Nc = 3; + const int Nz = 2; + + // Geometric parameters for QPhiX data layout + const auto nVecs = geom.nVecs(); + const auto Pxy = geom.getPxy(); + const auto Pxyz = geom.getPxyz(); + + // This is needed to translate between the different + // gamma bases tmlQCD and QPhiX are using + const int change_sign[4] = {1, -1, -1, 1}; + const int change_spin[4] = {3, 2, 1, 0}; + +// This will loop over the entire lattice and calculate +// the array and internal indices for both tmlQCD & QPhiX +#pragma omp parallel for collapse(4) + for (uint64_t t = 0; t < T; t++) + for (uint64_t x = 0; x < LX; x++) + for (uint64_t y = 0; y < LY; y++) + for (uint64_t z = 0; z < LZ; z++) { + // These are the QPhiX SIMD vector in checkerboarded x direction + // (up to LX/2) and the internal position inside the SIMD vector + const uint64_t SIMD_vector = (x / 2) / SOALEN; + const uint64_t x_internal = (x / 2) % SOALEN; + + // Calculate the array index in tmlQCD & QPhiX, + // given a global lattice index (t,x,y,z) + const uint64_t qphix_idx = t * Pxyz + z * Pxy + y * nVecs + SIMD_vector; + const uint64_t tm_idx = g_ipt[t][x][y][z]; + + // Calculate base point for every spinor field element (tmlQCD) or + // for every SIMD vector of spinors, a.k.a FourSpinorBlock (QPhiX), + // which will depend on the checkerboard (cb) + const double *in = tm_spinor + Ns * Nc * Nz * tm_idx; + FT *out; + if ((t + x + y + z) & 1) + out = qphix_spinor_cb1 + SOALEN * Nz * Nc * Ns * qphix_idx; // odd -> cb1 + else + out = qphix_spinor_cb0 + SOALEN * Nz * Nc * Ns * qphix_idx; // even -> cb0 + + // Copy the internal elements, performing a gamma basis transformation + for (int spin = 0; spin < Ns; spin++) // QPhiX spin index + for (int color = 0; color < Nc; color++) + for (int z = 0; z < Nz; z++) // RE or IM + { + const uint64_t qId = + x_internal + z * SOALEN + spin * SOALEN * Nz + color * SOALEN * Nz * Ns; + const uint64_t tId = z + color * Nz + change_spin[spin] * Nz * Nc; + + out[qId] = QPhiX::rep(change_sign[spin] * in[tId]); + } + + } // volume + + const double diffTime = gettime() - startTime; + if (g_debug_level > 1) { + QPhiX::masterPrintf("# QPHIX-interface: time spent in reorder_spinor_to_QPhiX: %f secs\n", + diffTime); + } +} + +// Reorder a cb0 and cb1 QPhiX spinor to a full tmLQCD spinor +template +void reorder_spinor_from_QPhiX(QPhiX::Geometry &geom, + double *tm_spinor, FT const *qphix_spinor_cb0, + FT const *qphix_spinor_cb1, double normFac = 1.0) { + const double startTime = gettime(); + + // Number of elements in spin, color & complex + const int Ns = 4; + const int Nc = 3; + const int Nz = 2; + + // Geometric parameters for QPhiX data layout + const auto nVecs = geom.nVecs(); + const auto Pxy = geom.getPxy(); + const auto Pxyz = geom.getPxyz(); + + // This is needed to translate between the different + // gamma bases tmlQCD and QPhiX are using + const int change_sign[4] = {1, -1, -1, 1}; + const int change_spin[4] = {3, 2, 1, 0}; + +// This will loop over the entire lattice and calculate +// the array and internal indices for both tmlQCD & QPhiX +#pragma omp parallel for collapse(4) + for (uint64_t t = 0; t < T; t++) + for (uint64_t x = 0; x < LX; x++) + for (uint64_t y = 0; y < LY; y++) + for (uint64_t z = 0; z < LZ; z++) { + // These are the QPhiX SIMD vector in checkerboarded x direction + // (up to LX/2) and the internal position inside the SIMD vector + const uint64_t SIMD_vector = (x / 2) / SOALEN; + const uint64_t x_internal = (x / 2) % SOALEN; + + // Calculate the array index in tmlQCD & QPhiX, + // given a global lattice index (t,x,y,z) + const uint64_t qphix_idx = t * Pxyz + z * Pxy + y * nVecs + SIMD_vector; + const uint64_t tm_idx = g_ipt[t][x][y][z]; + + // Calculate base point for every spinor field element (tmlQCD) or + // for every SIMD vector of spinors, a.k.a FourSpinorBlock (QPhiX), + // which will depend on the checkerboard (cb) + const FT *in; + if ((t + x + y + z) & 1) + in = qphix_spinor_cb1 + SOALEN * Nz * Nc * Ns * qphix_idx; // cb1 + else + in = qphix_spinor_cb0 + SOALEN * Nz * Nc * Ns * qphix_idx; // cb0 + double *out = tm_spinor + Ns * Nc * Nz * tm_idx; + + // Copy the internal elements, performing a gamma basis transformation + for (int spin = 0; spin < Ns; spin++) // tmlQCD spin index + for (int color = 0; color < Nc; color++) + for (int z = 0; z < Nz; z++) // RE or IM + { + const uint64_t qId = x_internal + z * SOALEN + change_spin[spin] * SOALEN * Nz + + color * SOALEN * Nz * Ns; + const uint64_t tId = z + color * Nz + spin * Nz * Nc; + + out[tId] = QPhiX::rep(normFac * change_sign[spin] * in[qId]); + } + + } // volume + + const double diffTime = gettime() - startTime; + if (g_debug_level > 1) { + QPhiX::masterPrintf("# QPHIX-interface: time spent in reorder_spinor_from_QPhiX: %f secs\n", + diffTime); + } +} + +template +void pack_nd_clover( + QPhiX::Geometry &geom, + QPhiX::Geometry &geom_inner, + typename QPhiX::Geometry::FullCloverBlock *full_invclov[2], + typename QPhiX::Geometry::CloverBlock *invclov_odiag, + typename QPhiX::Geometry::CloverBlock *clov, + typename QPhiX::Geometry::FullCloverBlock + *full_invclov_inner[2], + typename QPhiX::Geometry::CloverBlock + *invclov_odiag_inner, + typename QPhiX::Geometry::CloverBlock *clov_inner, + const int cb, bool pack_inner) { + typedef typename QPhiX::Geometry::CloverBlock QClover; + typedef typename QPhiX::Geometry::FullCloverBlock QFullClover; + typedef typename QPhiX::Geometry::CloverBlock + QClover_inner; + typedef typename QPhiX::Geometry::FullCloverBlock + QFullClover_inner; + + double start = gettime(); + reorder_clover_to_QPhiX(geom, clov, cb, false); + if (pack_inner) { + reorder_clover_to_QPhiX(geom_inner, clov_inner, cb, false); + } + + sw_invert_epsbar(g_epsbar); + reorder_clover_to_QPhiX(geom, invclov_odiag, 1 - cb, true, true); + if (pack_inner) { + reorder_clover_to_QPhiX(geom_inner, invclov_odiag_inner, 1 - cb, true, true); + } + + // no minus sign here, the difference in the sign of gamma5 + // is taken care of internally + sw_invert_mubar(g_mubar); + reorder_clover_to_QPhiX(geom, full_invclov, 1 - cb, true); + if (pack_inner) { + reorder_clover_to_QPhiX(geom_inner, full_invclov_inner, 1 - cb, true); + } + + sw_invert_nd(g_mubar * g_mubar - g_epsbar * g_epsbar); + + if (g_debug_level > 1) { + QPhiX::masterPrintf("# QPHIX-inteface: ND TMClover clover-field packing took %.4lf seconds\n", + gettime() - start); + } +} + +// Due to github issue #404, the helper functions to apply the full QPhiX operator +// are currently disabled because they conflict with the new interfaces in QPhiX +// itself. If required, these should be rewritten to use these interfaces +// rather than the base classes in qphix_base_classes.hpp + +// Apply the full QPhiX fermion matrix to checkerboarded tm spinors +// template +// void Mfull_helper(spinor *Even_out, spinor *Odd_out, const spinor *Even_in, const spinor *Odd_in, +// const op_type_t op_type) { +// // TODO: this should use handles for gauge and spinors because these are definitely temporary +// // objects +// typedef typename QPhiX::Geometry::SU3MatrixBlock QGauge; +// typedef typename QPhiX::Geometry::FourSpinorBlock QSpinor; +// typedef typename QPhiX::Geometry::CloverBlock QClover; +// typedef typename QPhiX::Geometry::FullCloverBlock QFullClover; +// +// if (g_debug_level > 1) tmlqcd::printQphixDiagnostics(V, S, compress, V, S, compress); +// +// double coeff_s = (FT)(1); +// double coeff_t = (FT)(1); +// +// QPhiX::Geometry geom(subLattSize, By, Bz, NCores, Sy, Sz, PadXY, PadXYZ, +// MinCt); +// +// // Wilson mass +// double mass = 1 / (2.0 * g_kappa) - 4; +// +// tmlqcd::Dslash *polymorphic_dslash; +// +// QGauge *u_packed[2]; +// QSpinor *qphix_in[2]; +// QSpinor *qphix_out[2]; +// +// QClover *clover[2]; +// QClover *inv_clover[2]; +// +// QFullClover *inv_fullclover[2][2]; +// +// QSpinor *tmp_spinor = (QSpinor *)geom.allocCBFourSpinor(); +// for (int cb : {0, 1}) { +// u_packed[cb] = (QGauge *)geom.allocCBGauge(); +// qphix_in[cb] = (QSpinor *)geom.allocCBFourSpinor(); +// qphix_out[cb] = (QSpinor *)geom.allocCBFourSpinor(); +// clover[cb] = nullptr; +// inv_clover[cb] = nullptr; +// for (int fl : {0, 1}) { +// inv_fullclover[cb][fl] = nullptr; +// } +// } +// reorder_gauge_to_QPhiX(geom, u_packed[cb_even], u_packed[cb_odd]); +// +// if (op_type == WILSON) { +// polymorphic_dslash = new tmlqcd::WilsonDslash( +// &geom, t_boundary, coeff_s, coeff_t, mass, use_tbc, tbc_phases); +// } else if (op_type == TMWILSON) { +// polymorphic_dslash = new tmlqcd::WilsonTMDslash( +// &geom, t_boundary, coeff_s, coeff_t, mass, -g_mu / (2.0 * g_kappa), use_tbc, tbc_phases); +// } else if (op_type == CLOVER && fabs(g_mu) <= DBL_EPSILON) { +// for (int cb : {0, 1}) { +// clover[cb] = (QClover *)geom.allocCBClov(); +// inv_clover[cb] = (QClover *)geom.allocCBClov(); +// +// reorder_clover_to_QPhiX(geom, clover[cb], cb, false); +// sw_invert(cb, 0); +// reorder_clover_to_QPhiX(geom, inv_clover[cb], cb, true); +// } +// +// polymorphic_dslash = new tmlqcd::WilsonClovDslash( +// &geom, t_boundary, coeff_s, coeff_t, mass, clover, inv_clover, use_tbc, tbc_phases); +// +// } else if (op_type == CLOVER && fabs(g_mu) > DBL_EPSILON) { +// for (int cb : {0, 1}) { +// clover[cb] = (QClover *)geom.allocCBClov(); +// for (int fl : {0, 1}) { +// inv_fullclover[cb][fl] = (QFullClover *)geom.allocCBFullClov(); +// } +// reorder_clover_to_QPhiX(geom, clover[cb], cb, false); +// sw_invert(cb, g_mu); +// reorder_clover_to_QPhiX(geom, inv_fullclover[cb], cb, true); +// } +// +// polymorphic_dslash = new tmlqcd::WilsonClovTMDslash( +// &geom, t_boundary, coeff_s, coeff_t, mass, -g_mu / (2.0 * g_kappa), clover, +// inv_fullclover, use_tbc, tbc_phases); +// +// } else { +// QPhiX::masterPrintf("tmlqcd::Mfull_helper; No such operator type: %d\n", op_type); +// abort(); +// } +// +//// reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(Even_in), +//// qphix_in[cb_even], cb_even); +//// reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(Odd_in), +/// qphix_in[cb_odd], / cb_odd); +// reorder_eo_spinor_to_QPhiX(geom, Even_in, +// qphix_in[cb_even], cb_even); +// reorder_eo_spinor_to_QPhiX(geom, Odd_in, qphix_in[cb_odd], +// cb_odd); +// // Apply QPhiX Mfull +// polymorphic_dslash->plain_dslash(qphix_out[cb_odd], qphix_in[cb_even], u_packed[cb_odd], +// /* isign == non-conjugate */ 1, cb_odd); +// polymorphic_dslash->plain_dslash(qphix_out[cb_even], qphix_in[cb_odd], u_packed[cb_even], +// /* isign == non-conjugate */ 1, cb_even); +// for (int cb : {0, 1}) { +// polymorphic_dslash->A_chi(tmp_spinor, qphix_in[cb], 1, cb); +// QPhiX::aypx(-0.5, tmp_spinor, qphix_out[cb], geom, 1); +// } +// +// reorder_eo_spinor_from_QPhiX(geom, Even_out, qphix_out[cb_even], +// cb_even, 2.0 * g_kappa); +// reorder_eo_spinor_from_QPhiX(geom, Odd_out, qphix_out[cb_odd], cb_odd, +// 2.0 * g_kappa); +// +// geom.free(tmp_spinor); +// for (int cb : {0, 1}) { +// geom.free(u_packed[cb]); +// geom.free(qphix_in[cb]); +// geom.free(qphix_out[cb]); +// geom.free(clover[cb]); +// geom.free(inv_clover[cb]); +// for (int fl : {0, 1}) { +// geom.free(inv_fullclover[cb][fl]); +// } +// }; +// delete (polymorphic_dslash); +//} + +// Templated even-odd preconditioned solver using QPhiX Library +template +int invert_eo_qphix_helper(std::vector > &tmlqcd_odd_out, + std::vector > &tmlqcd_odd_in, + const double target_precision, const int max_iter, const int solver_flag, + solver_params_t solver_params, const int num_flavour) { + // TODO: it would perhaps be beneficial to keep the fields resident + typedef typename QPhiX::Geometry::SU3MatrixBlock QGauge; + typedef typename QPhiX::Geometry::FourSpinorBlock QSpinor; + typedef typename QPhiX::FourSpinorHandle QSpinorHandle; + typedef typename QPhiX::Geometry::CloverBlock QClover; + typedef typename QPhiX::Geometry::FullCloverBlock QFullClover; + + typedef typename QPhiX::Geometry::SU3MatrixBlock + QGauge_inner; + typedef typename QPhiX::Geometry::FourSpinorBlock + QSpinor_inner; + typedef typename QPhiX::FourSpinorHandle + QSpinorHandle_inner; + typedef typename QPhiX::Geometry::CloverBlock + QClover_inner; + typedef typename QPhiX::Geometry::FullCloverBlock + QFullClover_inner; + + /************************ + * * + * SETUP GEOMETRY * + * * + ************************/ + + if (g_debug_level > 1) { + tmlqcd::printQphixDiagnostics(V, S, compress, V_inner, S_inner, compress_inner); + } + + QPhiX::Geometry geom(subLattSize, By, Bz, NCores, Sy, Sz, PadXY, PadXYZ, + MinCt); + + // we always create the inner geometry, the overhead should be small... + QPhiX::Geometry geom_inner( + subLattSize, By, Bz, NCores, Sy, Sz, PadXY, PadXYZ, MinCt); + + // Set number of BLAS threads by hand. + // In case some implements the tune routines in QPhiX + // this may be updated... + QPhiX::masterPrintf("# Setting number of BLAS threads...\n"); + const int n_blas_simt = N_simt; + QPhiX::masterPrintf("# ...done.\n"); + + // Anisotropy Coefficents + const double coeff_s = 1.0; + const double coeff_t = 1.0; + + // The Wilson mass + const double mass = 1.0 / (2.0 * g_kappa) - 4.0; + + // Set variables need for solve + bool verbose = g_debug_level > 2 ? true : false; + int niters = -1; + int niters2 = 0; + double rsd_final = -1.0; + uint64_t site_flops = 0; + uint64_t site_flops2 = 0; + uint64_t mv_apps = 0; + uint64_t mv_apps2 = 0; + + double start_time; + double end_time; + + // support for multi-shift solves via the length of the output vector, + // which counts the shifts on the outer index and the flavour on the inner index + const int num_shifts = tmlqcd_odd_out.size(); + std::vector shifts; + shifts.resize(num_shifts); + std::vector RsdTargetArr; + RsdTargetArr.resize(num_shifts); + std::vector RsdFinalArr; + RsdFinalArr.resize(num_shifts); + + double rescale = 0.5 / g_kappa; + // the inverse of M M^dag, as required for the HMC, comes with a factor of alpha^2 + if (solver_params.solution_type == TM_SOLUTION_M_MDAG) { + rescale *= rescale; + } + + std::vector q_spinor_handles; + + QGauge *u_packed[2] = {nullptr, nullptr}; + QGauge_inner *u_packed_inner[2] = {nullptr, nullptr}; + for (int cb : {0, 1}) { + u_packed[cb] = (QGauge *)geom.allocCBGauge(); + } + // Reorder (global) input gauge field from tmLQCD to QPhiX + reorder_gauge_to_QPhiX(geom, u_packed[cb_even], u_packed[cb_odd]); + + // for mixed solvers, we also need the gauge field in the inner precision + if (solver_is_mixed(solver_flag)) { + for (int cb : {0, 1}) { + u_packed_inner[cb] = (QGauge_inner *)geom_inner.allocCBGauge(); + } + reorder_gauge_to_QPhiX(geom_inner, u_packed_inner[cb_even], u_packed_inner[cb_odd]); + } + + if (num_flavour == 1) { + constexpr int nf = 1; + std::vector qphix_in; + qphix_in.resize(1); + std::vector qphix_out; + qphix_out.resize(num_shifts); + QSpinor *qphix_buffer; + + QClover *qphix_clover = nullptr; + QClover *qphix_inv_clover = nullptr; + + QClover_inner *qphix_clover_inner = nullptr; + QClover_inner *qphix_inv_clover_inner = nullptr; + + QFullClover *qphix_inv_fullclover[2] = {nullptr, nullptr}; + + QFullClover_inner *qphix_inv_fullclover_inner[2] = {nullptr, nullptr}; + + q_spinor_handles.push_back(makeFourSpinorHandle(geom)); + qphix_in[0] = q_spinor_handles.back().get(); + + for (int shift = 0; shift < num_shifts; shift++) { + q_spinor_handles.push_back(makeFourSpinorHandle(geom)); + qphix_out[shift] = q_spinor_handles.back().get(); + } + + q_spinor_handles.push_back(makeFourSpinorHandle(geom)); + qphix_buffer = q_spinor_handles.back().get(); + + QPhiX::EvenOddLinearOperator *FermionMatrixQPhiX = nullptr; + QPhiX::EvenOddLinearOperator + *InnerFermionMatrixQPhiX = nullptr; + if ((fabs(g_mu) > DBL_EPSILON) && g_c_sw > DBL_EPSILON) { // TWISTED-MASS-CLOVER + qphix_clover = (QClover *)geom.allocCBClov(); + for (int fl : {0, 1}) { + qphix_inv_fullclover[fl] = (QFullClover *)geom.allocCBFullClov(); + } + reorder_clover_to_QPhiX(geom, qphix_clover, cb_odd, false); + reorder_clover_to_QPhiX(geom, qphix_inv_fullclover, cb_even, true); + + QPhiX::masterPrintf("# Creating QPhiX Twisted Clover Fermion Matrix...\n"); + FermionMatrixQPhiX = new QPhiX::EvenOddTMCloverOperator( + u_packed, qphix_clover, qphix_inv_fullclover, &geom, t_boundary, coeff_s, coeff_t, + use_tbc, tbc_phases, -0.5 * (g_mu3 + g_mu) / g_kappa); + if (solver_is_mixed(solver_flag)) { + qphix_clover_inner = (QClover_inner *)geom_inner.allocCBClov(); + for (int fl : {0, 1}) { + qphix_inv_fullclover_inner[fl] = (QFullClover_inner *)geom_inner.allocCBFullClov(); + } + reorder_clover_to_QPhiX(geom_inner, qphix_clover_inner, cb_odd, false); + reorder_clover_to_QPhiX(geom_inner, qphix_inv_fullclover_inner, cb_even, true); + InnerFermionMatrixQPhiX = + new QPhiX::EvenOddTMCloverOperator( + u_packed_inner, qphix_clover_inner, qphix_inv_fullclover_inner, &geom_inner, + t_boundary, coeff_s, coeff_t, use_tbc, tbc_phases, -0.5 * (g_mu3 + g_mu) / g_kappa); + } + QPhiX::masterPrintf("# ...done.\n"); + } else if (fabs(g_mu) > DBL_EPSILON) { // TWISTED-MASS + const double TwistedMass = -g_mu / (2.0 * g_kappa); + QPhiX::masterPrintf("# Creating QPhiX Twisted Mass Wilson Fermion Matrix...\n"); + FermionMatrixQPhiX = new QPhiX::EvenOddTMWilsonOperator( + mass, TwistedMass, u_packed, &geom, t_boundary, coeff_s, coeff_t, use_tbc, tbc_phases); + QPhiX::masterPrintf("# ...done.\n"); + if (solver_is_mixed(solver_flag)) { + InnerFermionMatrixQPhiX = + new QPhiX::EvenOddTMWilsonOperator( + mass, TwistedMass, u_packed_inner, &geom_inner, t_boundary, coeff_s, coeff_t, + use_tbc, tbc_phases); + } + } else if (g_c_sw > DBL_EPSILON) { // WILSON CLOVER + qphix_clover = (QClover *)geom.allocCBClov(); + qphix_inv_clover = (QClover *)geom.allocCBClov(); + + reorder_clover_to_QPhiX(geom, qphix_clover, cb_odd, false); + reorder_clover_to_QPhiX(geom, qphix_inv_clover, cb_even, true); + + QPhiX::masterPrintf("# Creating QPhiX Wilson Clover Fermion Matrix...\n"); + FermionMatrixQPhiX = new QPhiX::EvenOddCloverOperator( + u_packed, qphix_clover, qphix_inv_clover, &geom, t_boundary, coeff_s, coeff_t, use_tbc, + tbc_phases, -0.5 * g_mu3 / g_kappa); + if (solver_is_mixed(solver_flag)) { + qphix_clover_inner = (QClover_inner *)geom_inner.allocCBClov(); + qphix_inv_clover_inner = (QClover_inner *)geom_inner.allocCBClov(); + reorder_clover_to_QPhiX(geom_inner, qphix_clover_inner, cb_odd, false); + reorder_clover_to_QPhiX(geom_inner, qphix_inv_clover_inner, cb_even, true); + InnerFermionMatrixQPhiX = + new QPhiX::EvenOddCloverOperator( + u_packed_inner, qphix_clover_inner, qphix_inv_clover_inner, &geom_inner, t_boundary, + coeff_s, coeff_t, use_tbc, tbc_phases, -0.5 * g_mu3 / g_kappa); + } + QPhiX::masterPrintf("# ...done.\n"); + + } else { // WILSON + QPhiX::masterPrintf("# Creating QPhiX Wilson Fermion Matrix...\n"); + FermionMatrixQPhiX = new QPhiX::EvenOddWilsonOperator( + mass, u_packed, &geom, t_boundary, coeff_s, coeff_t, use_tbc, tbc_phases); + if (solver_is_mixed(solver_flag)) { + InnerFermionMatrixQPhiX = + new QPhiX::EvenOddWilsonOperator( + mass, u_packed_inner, &geom_inner, t_boundary, coeff_s, coeff_t, use_tbc, + tbc_phases); + } + QPhiX::masterPrintf("# ...done.\n"); + } + + // Create a Linear Solver Object + QPhiX::AbstractSolver *SolverQPhiX = nullptr; + QPhiX::AbstractSolver *InnerSolverQPhiX = nullptr; + QPhiX::AbstractMultiSolver *MultiSolverQPhiX = nullptr; + if (solver_flag == DUMMYHERMTEST) { + QPhiX::masterPrintf("# QPHIX: Creating dummy solver for hermiticity test...\n"); + SolverQPhiX = + new QPhiX::InvDummyHermTest >( + *FermionMatrixQPhiX, max_iter); + } else if (solver_flag == CG) { + QPhiX::masterPrintf("# QPHIX: Creating CG solver...\n"); + SolverQPhiX = new QPhiX::InvCG(*FermionMatrixQPhiX, max_iter); + } else if (solver_flag == BICGSTAB) { + QPhiX::masterPrintf("# QPHIX: Creating BiCGStab solver...\n"); + SolverQPhiX = new QPhiX::InvBiCGStab(*FermionMatrixQPhiX, max_iter); + } else if (solver_flag == MIXEDCG) { + // TODO: probably need to adjust inner solver iterations here... + QPhiX::masterPrintf("# QPHIX: Creating mixed-precision CG solver...\n"); + InnerSolverQPhiX = new QPhiX::InvCG( + *InnerFermionMatrixQPhiX, max_iter); + const bool MMdag = true; + SolverQPhiX = new QPhiX::InvRichardsonMultiPrec( + *FermionMatrixQPhiX, *InnerSolverQPhiX, solver_params.mcg_delta, max_iter); + } else if (solver_flag == MIXEDBICGSTAB) { + QPhiX::masterPrintf("# QPHIX: Creating mixed-precision BICGCGSTAB solver...\n"); + InnerSolverQPhiX = new QPhiX::InvBiCGStab( + *InnerFermionMatrixQPhiX, max_iter); + const bool MMdag = false; + SolverQPhiX = new QPhiX::InvRichardsonMultiPrec( + *FermionMatrixQPhiX, *InnerSolverQPhiX, solver_params.mcg_delta, max_iter); + } else if (solver_flag == CGMMS) { + QPhiX::masterPrintf("# QPHIX: Creating multi-shift CG solver ...\n"); + MultiSolverQPhiX = + new QPhiX::MInvCG(*FermionMatrixQPhiX, max_iter, num_shifts); + } else { + QPhiX::masterPrintf(" Solver not yet supported by QPhiX!\n"); + QPhiX::masterPrintf(" Aborting...\n"); + abort(); + } + QPhiX::masterPrintf("# ...done.\n"); + + // reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(tmlqcd_odd_in[0][0]), + // qphix_in[0], cb_odd); + reorder_eo_spinor_to_QPhiX(geom, tmlqcd_odd_in[0][0], qphix_in[0], cb_odd); + QPhiX::masterPrintf("# Calling the solver...\n"); + + // Set the right precision for the QPhiX solver + // we get target_precision externally and and is given such, that it's either + // already relative or absolute + // Most QPhiX solvers allow setting absolute or relative residual + // by passing an appropriate flag, but this is not true for the multi-shift solver. + // As a result, we follow that solver and call ALL solvers with + // QPhiX::RELATIVE, which gives results consistent with tmLQCD in all cases. + double rhs_norm2 = 1.0; + QPhiX::norm2Spinor(rhs_norm2, qphix_in[0], geom, n_blas_simt); + const double RsdTarget = sqrt(target_precision / rhs_norm2); + + // Calling the solver + start_time = gettime(); + if (solver_flag == DUMMYHERMTEST) { + random_spinor_field_eo(tmlqcd_odd_out[0][0], 0, RN_GAUSS); + reorder_eo_spinor_to_QPhiX(geom, tmlqcd_odd_out[0][0], qphix_buffer, cb_odd); + for (int isign : {-1, 1}) { + (*SolverQPhiX)(qphix_buffer, qphix_in[0], RsdTarget, niters, rsd_final, site_flops, mv_apps, + isign, verbose, cb_odd, QPhiX::RELATIVE); + } + QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); + } else if (solver_flag == CG || solver_flag == MIXEDCG || solver_flag == RGMIXEDCG) { + // USING CG: + // We are solving + // M M^dagger qphix_buffer = qphix_in_prepared + // here, that is, isign = -1 for the QPhiX CG solver. + (*SolverQPhiX)(qphix_buffer, qphix_in[0], RsdTarget, niters, rsd_final, site_flops, mv_apps, + -1, verbose, cb_odd, QPhiX::RELATIVE); + // After that. if required by the solution type, multiply with M^dagger: + // qphix_out[1] = M^dagger ( M^dagger^-1 M^-1 ) qphix_in_prepared + if (solver_params.solution_type == TM_SOLUTION_M) { + (*FermionMatrixQPhiX)(qphix_out[0], qphix_buffer, /* conjugate */ -1); + mv_apps++; + } else { + QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); + } + } else if (solver_flag == CGMMS) { + // TODO: handle the residuals properly + if (g_debug_level > 2) QPhiX::masterPrintf("# QPHIX CGMMS: shifts: \n"); + for (int shift = 0; shift < num_shifts; shift++) { + RsdTargetArr[shift] = RsdTarget; + RsdFinalArr[shift] = -1.0; + shifts[shift] = + solver_params.shifts[shift] * solver_params.shifts[shift] / (4 * g_kappa * g_kappa); + if (g_debug_level > 2) + QPhiX::masterPrintf("# QPHIX CGMMS: shift[%d] = %.6e\n", shift, shifts[shift]); + } + if (g_debug_level > 2) QPhiX::masterPrintf("\n"); + (*MultiSolverQPhiX)(qphix_out.data(), qphix_in[0], num_shifts, shifts.data(), + RsdTargetArr.data(), niters, RsdFinalArr.data(), site_flops, mv_apps, -1, + verbose); + rsd_final = RsdFinalArr[0]; + } else if (solver_flag == BICGSTAB || solver_flag == MIXEDBICGSTAB) { + (*SolverQPhiX)(qphix_buffer, qphix_in[0], RsdTarget, niters, rsd_final, site_flops, mv_apps, + 1, verbose, cb_odd, QPhiX::RELATIVE); + // for M^dagger^-1 M^-1 solution type, need to call BiCGstab twice + if (solver_params.solution_type == TM_SOLUTION_M_MDAG) { + (*SolverQPhiX)(qphix_out[0], qphix_buffer, RsdTarget, niters2, rsd_final, site_flops, + mv_apps2, -1, verbose, cb_odd, QPhiX::RELATIVE); + } else { + QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); + } + } + end_time = gettime(); + + for (int shift = 0; shift < num_shifts; shift++) { + reorder_eo_spinor_from_QPhiX(geom, tmlqcd_odd_out[shift][0], qphix_out[shift], cb_odd, + rescale); + } + + QPhiX::masterPrintf("# QPHIX: ...done.\n"); + QPhiX::masterPrintf("# QPHIX: Cleaning up\n"); + delete (FermionMatrixQPhiX); + delete (InnerFermionMatrixQPhiX); + delete (SolverQPhiX); + delete (InnerSolverQPhiX); + delete (MultiSolverQPhiX); + // on KNL, it seems that munmap is problematic, so we check for nullptr + if (qphix_clover) geom.free(qphix_clover); + if (qphix_inv_clover) geom.free(qphix_inv_clover); + if (qphix_clover_inner) geom_inner.free(qphix_clover_inner); + if (qphix_inv_clover_inner) geom_inner.free(qphix_inv_clover_inner); + for (int fl : {0, 1}) { + if (qphix_inv_fullclover[fl]) geom.free(qphix_inv_fullclover[fl]); + if (qphix_inv_fullclover_inner[fl]) geom_inner.free(qphix_inv_fullclover_inner[fl]); + } + QPhiX::masterPrintf("# QPHIX: ...done.\n\n"); + + } else if (num_flavour == 2) { + // for explicit template arguments + constexpr int nf = 2; + + QSpinor *qphix_in[2]; + std::vector qphix_out; + qphix_out.resize(num_shifts); + for (int shift = 0; shift < num_shifts; shift++) { + qphix_out[shift] = new QSpinor *[2]; + for (int fl : {0, 1}) { + q_spinor_handles.push_back(makeFourSpinorHandle(geom)); + qphix_out[shift][fl] = q_spinor_handles.back().get(); + } + } + + QSpinor *qphix_buffer[2]; + for (int fl : {0, 1}) { + q_spinor_handles.push_back(makeFourSpinorHandle(geom)); + qphix_in[fl] = q_spinor_handles.back().get(); + q_spinor_handles.push_back(makeFourSpinorHandle(geom)); + qphix_buffer[fl] = q_spinor_handles.back().get(); + } + + QClover *qphix_clover = nullptr; + QClover_inner *qphix_clover_inner = nullptr; + + QClover *qphix_invclov_odiag = nullptr; + QClover_inner *qphix_invclov_odiag_inner = nullptr; + + QFullClover *qphix_inv_fullclover[2] = {nullptr, nullptr}; + QFullClover_inner *qphix_inv_fullclover_inner[2] = {nullptr, nullptr}; + + QPhiX::TwoFlavEvenOddLinearOperator *TwoFlavFermionMatrixQPhiX = nullptr; + QPhiX::TwoFlavEvenOddLinearOperator + *InnerTwoFlavFermionMatrixQPhiX = nullptr; + + if (g_c_sw > DBL_EPSILON) { // DBCLOVER + qphix_clover = (QClover *)geom.allocCBClov(); + qphix_invclov_odiag = (QClover *)geom.allocCBClov(); + if (solver_is_mixed(solver_flag)) { + qphix_clover_inner = (QClover_inner *)geom_inner.allocCBClov(); + qphix_invclov_odiag_inner = (QClover_inner *)geom_inner.allocCBClov(); + } + + for (int fl : {0, 1}) { + qphix_inv_fullclover[fl] = (QFullClover *)geom.allocCBFullClov(); + if (solver_is_mixed(solver_flag)) { + qphix_inv_fullclover_inner[fl] = (QFullClover_inner *)geom_inner.allocCBFullClov(); + } + } + + pack_nd_clover(geom, geom_inner, qphix_inv_fullclover, qphix_invclov_odiag, qphix_clover, + qphix_inv_fullclover_inner, qphix_invclov_odiag_inner, qphix_clover_inner, + cb_odd, solver_is_mixed(solver_flag)); + + QPhiX::masterPrintf( + "# QPHIX: Creating two-flavour QPhiX Wilson Twisted Clover Fermion Matrix...\n"); + TwoFlavFermionMatrixQPhiX = new QPhiX::EvenOddNDTMCloverReuseOperator( + -0.5 * g_mubar / g_kappa, 0.5 * g_epsbar / g_kappa, u_packed, qphix_clover, + qphix_invclov_odiag, qphix_inv_fullclover, &geom, t_boundary, coeff_s, coeff_t, use_tbc, + tbc_phases); + if (solver_is_mixed(solver_flag)) { + InnerTwoFlavFermionMatrixQPhiX = + new QPhiX::EvenOddNDTMCloverReuseOperator( + -0.5 * g_mubar / g_kappa, 0.5 * g_epsbar / g_kappa, u_packed_inner, + qphix_clover_inner, qphix_invclov_odiag_inner, qphix_inv_fullclover_inner, + &geom_inner, t_boundary, coeff_s, coeff_t, use_tbc, tbc_phases); + } + } else { // DBTMWILSON + QPhiX::masterPrintf( + "# QPHIX: Creating two-flavour QPhiX Wilson Twisted Mass Fermion Matrix...\n"); + TwoFlavFermionMatrixQPhiX = new QPhiX::EvenOddNDTMWilsonReuseOperator( + mass, -0.5 * g_mubar / g_kappa, 0.5 * g_epsbar / g_kappa, u_packed, &geom, t_boundary, + coeff_s, coeff_t, use_tbc, tbc_phases); + if (solver_is_mixed(solver_flag)) { + InnerTwoFlavFermionMatrixQPhiX = + new QPhiX::EvenOddNDTMWilsonReuseOperator( + mass, -0.5 * g_mubar / g_kappa, 0.5 * g_epsbar / g_kappa, u_packed_inner, + &geom_inner, t_boundary, coeff_s, coeff_t, use_tbc, tbc_phases); + } + } + + // + QPhiX::AbstractSolver *TwoFlavSolverQPhiX = nullptr; + QPhiX::AbstractSolver *InnerTwoFlavSolverQPhiX = + nullptr; + QPhiX::AbstractMultiSolver *TwoFlavMultiSolverQPhiX = nullptr; + if (solver_flag == DUMMYHERMTEST) { + QPhiX::masterPrintf("# QPHIX: Creating dummy solver for hermiticity test...\n"); + TwoFlavSolverQPhiX = new QPhiX::InvDummyHermTest< + FT, V, S, compress, typename QPhiX::TwoFlavEvenOddLinearOperator >( + *TwoFlavFermionMatrixQPhiX, max_iter); + } else if (solver_flag == CG) { + QPhiX::masterPrintf("# QPHIX: Creating CG solver...\n"); + TwoFlavSolverQPhiX = + new QPhiX::InvCG >( + *TwoFlavFermionMatrixQPhiX, max_iter); + } else if (solver_flag == BICGSTAB) { + QPhiX::masterPrintf("# QPHIX: Creating BiCGstab solver...\n"); + TwoFlavSolverQPhiX = + new QPhiX::InvBiCGStab >( + *TwoFlavFermionMatrixQPhiX, max_iter); + } else if (solver_flag == MIXEDCG) { + QPhiX::masterPrintf("# QPHIX: Creating mixed-precision CG solver...\n"); + InnerTwoFlavSolverQPhiX = + new QPhiX::InvCG >( + *InnerTwoFlavFermionMatrixQPhiX, max_iter); + const bool MMdag = true; + TwoFlavSolverQPhiX = new QPhiX::InvRichardsonMultiPrec< + FT, V, S, compress, FT_inner, V_inner, S_inner, compress_inner, MMdag, + typename QPhiX::TwoFlavEvenOddLinearOperator >( + *TwoFlavFermionMatrixQPhiX, *InnerTwoFlavSolverQPhiX, solver_params.mcg_delta, max_iter); + } else if (solver_flag == CGMMSND) { + QPhiX::masterPrintf("# QPHIX: Creating multi-shift CG solver...\n"); + TwoFlavMultiSolverQPhiX = + new QPhiX::MInvCG >( + *TwoFlavFermionMatrixQPhiX, max_iter, num_shifts); + } else { + QPhiX::masterPrintf(" Solver not yet supported by QPhiX!\n"); + QPhiX::masterPrintf(" Aborting...\n"); + abort(); + } + QPhiX::masterPrintf("# QPHIX: ...done.\n"); + + for (int fl : {0, 1}) { + // reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(tmlqcd_odd_in[0][fl]), + // qphix_in[fl], cb_odd); + reorder_eo_spinor_to_QPhiX(geom, tmlqcd_odd_in[0][fl], qphix_in[fl], cb_odd); + } + + QPhiX::masterPrintf("# QPHIX: Calling the solver...\n"); + + // Set the right precision for the QPhiX solver + // we get target_precision externally and and is given such, that it's either + // already relative or absolute + // Most QPhiX solvers allow setting absolute or relative residual + // by passing an appropriate flag, but this is not true for the multi-shift solver. + // As a result, we follow that solver and call ALL solvers with + // QPhiX::RELATIVE, which gives results consistent with tmLQCD in all cases. + double rhs_norm2 = 1.0; + QPhiX::norm2Spinor(rhs_norm2, qphix_in, geom, n_blas_simt); + const double RsdTarget = sqrt(target_precision / rhs_norm2); + + // Calling the solver + start_time = gettime(); + if (solver_flag == DUMMYHERMTEST) { + for (int fl : {0, 1}) { + random_spinor_field_eo(tmlqcd_odd_out[0][fl], 0, RN_GAUSS); + reorder_eo_spinor_to_QPhiX(geom, tmlqcd_odd_out[0][fl], qphix_buffer[fl], cb_odd); + } + for (int isign : {-1, 1}) { + (*TwoFlavSolverQPhiX)(qphix_buffer, qphix_in, RsdTarget, niters, rsd_final, site_flops, + mv_apps, isign, verbose, cb_odd, QPhiX::RELATIVE); + } + QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); + } else if (solver_flag == CG || solver_flag == MIXEDCG) { + // USING CG: + // We are solving + // M M^dagger qphix_buffer = qphix_in_prepared + // here, that is, isign = -1 for the QPhiX CG solver. + (*TwoFlavSolverQPhiX)(qphix_buffer, qphix_in, RsdTarget, niters, rsd_final, site_flops, + mv_apps, -1, verbose, cb_odd, QPhiX::RELATIVE); + // After that. if required by the solution type, multiply with M^dagger: + // qphix_out[1] = M^dagger M^dagger^-1 M^-1 qphix_in_prepared + if (solver_params.solution_type == TM_SOLUTION_M) { + (*TwoFlavFermionMatrixQPhiX)(qphix_out[0], qphix_buffer, /* conjugate */ -1); + mv_apps++; + } else { + QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); + } + } else if (solver_flag == BICGSTAB || solver_flag == MIXEDBICGSTAB) { + (*TwoFlavSolverQPhiX)(qphix_buffer, qphix_in, RsdTarget, niters, rsd_final, site_flops, + mv_apps, 1, verbose, cb_odd, QPhiX::RELATIVE); + // for M^dagger^-1 M^-1 solution type, need to call BiCGstab twice + if (solver_params.solution_type == TM_SOLUTION_M_MDAG) { + (*TwoFlavSolverQPhiX)(qphix_out[0], qphix_buffer, RsdTarget, niters2, rsd_final, site_flops, + mv_apps2, -1, verbose, cb_odd, QPhiX::RELATIVE); + } else { + QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); + } + } else if (solver_flag == CGMMSND) { + // TODO: handle the residuals properly + if (g_debug_level > 2) QPhiX::masterPrintf("# QPHIX CGMMSND: shifts: \n"); + // tmLQCD weights the operator with 1/maxev in the RHMC relative to the shifts + // we will do this externally on the inverse (in monomial_solve) and thus need to weight + // the shifts by maxev^2 + const double maxev_sq = (1.0 / phmc_invmaxev) * (1.0 / phmc_invmaxev); + for (int shift = 0; shift < num_shifts; shift++) { + RsdTargetArr[shift] = RsdTarget; + RsdFinalArr[shift] = -1.0; + shifts[shift] = maxev_sq * solver_params.shifts[shift] * solver_params.shifts[shift] / + (4 * g_kappa * g_kappa); + if (g_debug_level > 2) QPhiX::masterPrintf("# [%d] = %lf\n", shift, shifts[shift]); + } + if (g_debug_level > 2) QPhiX::masterPrintf("\n"); + (*TwoFlavMultiSolverQPhiX)(qphix_out.data(), qphix_in, num_shifts, shifts.data(), + RsdTargetArr.data(), niters, RsdFinalArr.data(), site_flops, + mv_apps, -1, verbose); + rsd_final = RsdFinalArr[0]; + } + end_time = gettime(); + + for (int shift = 0; shift < num_shifts; shift++) { + for (int fl : {0, 1}) { + reorder_eo_spinor_from_QPhiX(geom, tmlqcd_odd_out[shift][fl], qphix_out[shift][fl], cb_odd, + rescale); + } + } + + delete TwoFlavFermionMatrixQPhiX; + delete InnerTwoFlavFermionMatrixQPhiX; + delete InnerTwoFlavSolverQPhiX; + delete TwoFlavMultiSolverQPhiX; + delete TwoFlavSolverQPhiX; + for (int shift = 0; shift < num_shifts; shift++) { + delete[] qphix_out[shift]; + } + + if (qphix_clover) geom.free(qphix_clover); + if (qphix_invclov_odiag) geom.free(qphix_invclov_odiag); + if (qphix_clover_inner) geom_inner.free(qphix_clover_inner); + if (qphix_invclov_odiag_inner) geom_inner.free(qphix_invclov_odiag_inner); + for (int fl : {0, 1}) { + if (qphix_inv_fullclover[fl]) geom.free(qphix_inv_fullclover[fl]); + if (qphix_inv_fullclover_inner[fl]) geom_inner.free(qphix_inv_fullclover_inner[fl]); + } + + } else { // if(num_flavour) + // complain, this number of flavours is not valid + } // if(num_flavour) + + for (int cb : {0, 1}) { + if (u_packed[cb]) geom.free(u_packed[cb]); + if (u_packed_inner[cb]) geom_inner.free(u_packed_inner[cb]); + } + + // FIXME: This should be called properly somewhere else + _endQphix(); + + QPhiX::masterPrintf("# ...done.\n\n"); + + uint64_t num_cb_sites = lattSize[0] / 2 * lattSize[1] * lattSize[2] * lattSize[3]; + // FIXME: this needs to be adjusted depending on the operator used + uint64_t op_flops_per_site = 1320; + uint64_t total_flops = + (site_flops + site_flops2 + (2 * num_flavour * op_flops_per_site) * (mv_apps + mv_apps2)) * + num_cb_sites; + QPhiX::masterPrintf("# QPHIX: Solver Time = %g sec\n", (end_time - start_time)); + QPhiX::masterPrintf("# QPHIX: Performance in GFLOPS = %g\n\n", + 1.0e-9 * total_flops / (end_time - start_time)); + + if (solver_is_mixed(solver_flag)) { + // the mixed solver reports the outer iterations, we would like to get + // some better total + niters = mv_apps / 2; + if (solver_flag == MIXEDBICGSTAB && solver_params.solution_type == TM_SOLUTION_M_MDAG) { + niters2 = mv_apps2 / 2; + } + } + // solver did not converge in maximum number of iterations + // FIXME: non-convergence does not work correctly yet + if ((niters + niters2) > max_iter) { + niters = -1; + niters2 = 0; + } + return (niters + niters2); +} + +// Due to github issue #404, the helper functions to apply the full QPhiX operator +// are currently disabled because they conflict with the new interfaces in QPhiX +// itself. If required, these should be rewritten to use these interfaces +// rather than the base classes in qphix_base_classes.hpp + +// Template wrapper for the Dslash operator call-able from C code +// void Mfull_qphix(spinor *Even_out, spinor *Odd_out, const spinor *Even_in, const spinor *Odd_in, +// const op_type_t op_type) { +// tmlqcd::checkQphixInputParameters(qphix_input); +// // FIXME: two-row gauge compression and double precision hard-coded +// _initQphix(0, nullptr, qphix_input, 12, QPHIX_DOUBLE_PREC); +// +// if (qphix_precision == QPHIX_DOUBLE_PREC) { +// if (QPHIX_SOALEN > VECLEN_DP) { +// QPhiX::masterPrintf("SOALEN=%d is greater than the double prec VECLEN=%d\n", QPHIX_SOALEN, +// VECLEN_DP); +// abort(); +// } +// QPhiX::masterPrintf("TESTING IN DOUBLE PRECISION \n"); +// if (compress12) { +// Mfull_helper(Even_out, Odd_out, Even_in, Odd_in, +// op_type); +// } else { +// Mfull_helper(Even_out, Odd_out, Even_in, Odd_in, +// op_type); +// } +// } else if (qphix_precision == QPHIX_FLOAT_PREC) { +// if (QPHIX_SOALEN > VECLEN_SP) { +// QPhiX::masterPrintf("SOALEN=%d is greater than the single prec VECLEN=%d\n", QPHIX_SOALEN, +// VECLEN_SP); +// abort(); +// } +// QPhiX::masterPrintf("TESTING IN SINGLE PRECISION \n"); +// if (compress12) { +// Mfull_helper(Even_out, Odd_out, Even_in, Odd_in, +// op_type); +// } else { +// Mfull_helper(Even_out, Odd_out, Even_in, Odd_in, +// op_type); +// } +// } +// #if (defined(QPHIX_MIC_SOURCE) || defined(QPHIX_AVX512_SOURCE)) +// else if (qphix_precision == QPHIX_HALF_PREC) { +// if (QPHIX_SOALEN > VECLEN_HP) { +// QPhiX::masterPrintf("SOALEN=%d is greater than the half prec VECLEN=%d\n", QPHIX_SOALEN, +// VECLEN_HP); +// abort(); +// } +// QPhiX::masterPrintf("TESTING IN HALF PRECISION \n"); +// if (compress12) { +// Mfull_helper(Even_out, Odd_out, Even_in, Odd_in, +// op_type); +// } else { +// Mfull_helper(Even_out, Odd_out, Even_in, +// Odd_in, +// op_type); +// } +// } +// #endif +//} + +// we have a unified interface for n-flavour inversions, but we need to provide wrappers +// which can be called by the tmLQCD solver drivers for one and two-flavour inversions +int invert_eo_qphix_oneflavour(spinor *Odd_out_1f, spinor *Odd_in_1f, const int max_iter, + const double precision, const int solver_flag, const int rel_prec, + const solver_params_t solver_params, const SloppyPrecision sloppy, + const CompressionType compression) { + const int num_flavour = 1; + const int num_shifts = 1; + std::vector > Odd_out; + std::vector > Odd_in; + + Odd_out.resize(num_shifts); + Odd_out[0].resize(num_flavour); + Odd_in.resize(1); + Odd_in[0].resize(num_flavour); + + Odd_in[0][0] = Odd_in_1f; + Odd_out[0][0] = Odd_out_1f; + + return invert_eo_qphix_nflavour_mshift(Odd_out, Odd_in, precision, max_iter, solver_flag, + rel_prec, solver_params, sloppy, compression, num_flavour); +} + +int invert_eo_qphix_oneflavour_mshift(spinor **Odd_out_1f, spinor *Odd_in_1f, const int max_iter, + const double precision, const int solver_flag, + const int rel_prec, const solver_params_t solver_params, + const SloppyPrecision sloppy, + const CompressionType compression) { + // even though the default is set to 1, guard against zeroes + const int num_shifts = solver_params.no_shifts == 0 ? 1 : solver_params.no_shifts; + const int num_flavour = 1; + std::vector > Odd_out; + std::vector > Odd_in; + + Odd_out.resize(num_shifts); + Odd_in.resize(1); + Odd_in[0].resize(num_flavour); + + Odd_in[0][0] = Odd_in_1f; + for (int shift = 0; shift < num_shifts; shift++) { + Odd_out[shift].resize(num_flavour); + Odd_out[shift][0] = Odd_out_1f[shift]; + } + + return invert_eo_qphix_nflavour_mshift(Odd_out, Odd_in, precision, max_iter, solver_flag, + rel_prec, solver_params, sloppy, compression, num_flavour); +} + +// Template wrapper for QPhiX solvers callable from C code, return number of iterations +int invert_eo_qphix_twoflavour(spinor *Odd_out_s, spinor *Odd_out_c, spinor *Odd_in_s, + spinor *Odd_in_c, const int max_iter, const double precision, + const int solver_flag, const int rel_prec, + const solver_params_t solver_params, const SloppyPrecision sloppy, + const CompressionType compression) { + const int num_flavour = 2; + const int num_shifts = 1; + std::vector > Odd_out; + std::vector > Odd_in; + + Odd_out.resize(num_shifts); + Odd_out[0].resize(num_flavour); + Odd_in.resize(1); + Odd_in[0].resize(num_flavour); + + Odd_in[0][0] = Odd_in_s; + Odd_in[0][1] = Odd_in_c; + + Odd_out[0][0] = Odd_out_s; + Odd_out[0][1] = Odd_out_c; + + return invert_eo_qphix_nflavour_mshift(Odd_out, Odd_in, precision, max_iter, solver_flag, + rel_prec, solver_params, sloppy, compression, num_flavour); +} + +int invert_eo_qphix_twoflavour_mshift(spinor **Odd_out_s, spinor **Odd_out_c, spinor *Odd_in_s, + spinor *Odd_in_c, const int max_iter, const double precision, + const int solver_flag, const int rel_prec, + const solver_params_t solver_params, + const SloppyPrecision sloppy, + const CompressionType compression) { + // even though the default is set to 1, guard against zeroes + const int num_shifts = solver_params.no_shifts == 0 ? 1 : solver_params.no_shifts; + const int num_flavour = 2; + std::vector > Odd_out; + std::vector > Odd_in; + + Odd_out.resize(num_shifts); + Odd_in.resize(1); + Odd_in[0].resize(num_flavour); + + Odd_in[0][0] = Odd_in_s; + Odd_in[0][1] = Odd_in_c; + + for (int shift = 0; shift < num_shifts; shift++) { + Odd_out[shift].resize(num_flavour); + Odd_out[shift][0] = Odd_out_s[shift]; + Odd_out[shift][1] = Odd_out_c[shift]; + } + + return invert_eo_qphix_nflavour_mshift(Odd_out, Odd_in, precision, max_iter, solver_flag, + rel_prec, solver_params, sloppy, compression, num_flavour); +} + +// Template wrapper for QPhiX solvers callable from C code, return number of iterations +// the interface is prepared for multi-rhs solves, hence the double vector for the input +int invert_eo_qphix_nflavour_mshift(std::vector > &Odd_out, + std::vector > &Odd_in, + const double precision, const int max_iter, + const int solver_flag, const int rel_prec, + solver_params_t solver_params, const SloppyPrecision sloppy, + const CompressionType compression, const int num_flavour) { + tmlqcd::checkQphixInputParameters(qphix_input); + double target_precision = precision; + double src_norm = 0.0; + for (int f = 0; f < num_flavour; ++f) { + src_norm += square_norm(Odd_in[0][f], VOLUME / 2, 1); + } + // we use "precision_lambda" to determine if a system can be solved in half or float + // precision (when a fixed-precision solver is used) + double precision_lambda = target_precision / src_norm; + if (rel_prec == 1) { + QPhiX::masterPrintf("# QPHIX: Using relative precision\n"); + target_precision = precision * src_norm; + precision_lambda = precision; + } + QPhiX::masterPrintf("# QPHIX: precision_lambda: %g, target_precision: %g\n\n", precision_lambda, + target_precision); + + // mixed solvers require inner and outer precisions, which we specify explicitly here + if (solver_is_mixed(solver_flag)) { +#if (defined(QPHIX_MIC_SOURCE) || defined(QPHIX_AVX512_SOURCE)) + if (sloppy == SLOPPY_HALF) { + if (QPHIX_SOALEN > VECLEN_DP || QPHIX_SOALEN > VECLEN_HP) { + QPhiX::masterPrintf( + "SOALEN=%d is greater than the half prec VECLEN=%d or the double prec VECLEN=%d\n", + QPHIX_SOALEN, VECLEN_HP, VECLEN_DP); + abort(); + } + QPhiX::masterPrintf("# INITIALIZING QPHIX MIXED SOLVER\n"); + QPhiX::masterPrintf("# USING DOUBLE-HALF PRECISION\n"); + _initQphix(0, nullptr, qphix_input, compression, QPHIX_DOUBLE_PREC, QPHIX_HALF_PREC); + if (compress12) { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } else { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } + } else +#else + if (sloppy == SLOPPY_HALF) { + QPhiX::masterPrintf("QPHIX interface: half precision not supported on this architecture!\n"); + abort(); + } else +#endif + if (sloppy == SLOPPY_SINGLE) { + if (QPHIX_SOALEN > VECLEN_DP || QPHIX_SOALEN > VECLEN_SP) { + QPhiX::masterPrintf( + "SOALEN=%d is greater than the single prec VECLEN=%d or the double prec VECLEN=%d\n", + QPHIX_SOALEN, VECLEN_SP, VECLEN_DP); + abort(); + } + QPhiX::masterPrintf("# INITIALIZING QPHIX MIXED SOLVER\n"); + QPhiX::masterPrintf("# USING DOUBLE-SINGLE PRECISION\n"); + _initQphix(0, nullptr, qphix_input, compression, QPHIX_DOUBLE_PREC, QPHIX_FLOAT_PREC); + if (compress12) { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } else { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } + } else { // if(sloppy) + if (QPHIX_SOALEN > VECLEN_DP) { + QPhiX::masterPrintf("SOALEN=%d is greater than the double prec VECLEN=%d\n", QPHIX_SOALEN, + VECLEN_DP); + abort(); + } + QPhiX::masterPrintf("# INITIALIZING QPHIX MIXED SOLVER\n"); + QPhiX::masterPrintf("# USING DOUBLE-DOUBLE PRECISION\n"); + _initQphix(0, nullptr, qphix_input, compression, QPHIX_DOUBLE_PREC, QPHIX_DOUBLE_PREC); + if (compress12) { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } else { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } + } // if( sloppy ) + } else { // if( solver_is_mixed ) +#if (defined(QPHIX_MIC_SOURCE) || defined(QPHIX_AVX512_SOURCE)) + if (sloppy == SLOPPY_HALF || precision_lambda >= rsdTarget::value) { + if (QPHIX_SOALEN > VECLEN_HP) { + QPhiX::masterPrintf("SOALEN=%d is greater than the half prec VECLEN=%d\n", QPHIX_SOALEN, + VECLEN_HP); + abort(); + } + QPhiX::masterPrintf("# INITIALIZING QPHIX SOLVER\n"); + QPhiX::masterPrintf("# USING HALF PRECISION\n"); + _initQphix(0, nullptr, qphix_input, compression, QPHIX_HALF_PREC); + + if (compress12) { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } else { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } + } else +#else + if (sloppy == SLOPPY_HALF) { + QPhiX::masterPrintf("QPHIX interface: half precision not supported on this architecture!\n"); + abort(); + } else +#endif + if (sloppy == SLOPPY_SINGLE || precision_lambda >= rsdTarget::value) { + if (QPHIX_SOALEN > VECLEN_SP) { + QPhiX::masterPrintf("SOALEN=%d is greater than the single prec VECLEN=%d\n", QPHIX_SOALEN, + VECLEN_SP); + abort(); + } + QPhiX::masterPrintf("# INITIALIZING QPHIX SOLVER\n"); + QPhiX::masterPrintf("# USING SINGLE PRECISION\n"); + _initQphix(0, nullptr, qphix_input, compression, QPHIX_FLOAT_PREC); + + if (compress12) { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } else { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } + } else { + if (QPHIX_SOALEN > VECLEN_DP) { + QPhiX::masterPrintf("SOALEN=%d is greater than the double prec VECLEN=%d\n", QPHIX_SOALEN, + VECLEN_DP); + abort(); + } + QPhiX::masterPrintf("# INITIALIZING QPHIX SOLVER\n"); + QPhiX::masterPrintf("# USING DOUBLE PRECISION\n"); + _initQphix(0, nullptr, qphix_input, compression, QPHIX_DOUBLE_PREC); + + if (compress12) { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } else { + return invert_eo_qphix_helper( + Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); + } + } // if( sloppy || target_precision ) + } // if ( solver_flag == *MIXEDCG ) + return -1; +} + +void tmlqcd::checkQphixInputParameters(const tm_QPhiXParams_t ¶ms) { + if (params.MinCt == 0) { + QPhiX::masterPrintf("QPHIX Error: MinCt cannot be 0! Minimal value: 1. Aborting.\n"); + abort(); + } + if (params.By == 0 || params.Bz == 0) { + QPhiX::masterPrintf("QPHIX Error: By and Bz may not be 0! Minimal value: 1. Aborting.\n"); + abort(); + } + if (params.NCores * params.Sy * params.Sz != omp_num_threads) { + QPhiX::masterPrintf("QPHIX Error: NCores * Sy * Sz != ompnumthreads ! Aborting.\n"); + abort(); + } +} + +void tmlqcd::printQphixDiagnostics(int VECLEN, int SOALEN, bool compress, int VECLEN_inner, + int SOALEN_inner, bool compress_inner) { + QPhiX::masterPrintf("# QphiX: VECLEN=%d SOALEN=%d VECLEN_inner=%d, SOALEN_inner=%d\n", VECLEN, + SOALEN, VECLEN_inner, SOALEN_inner); + + QPhiX::masterPrintf("# QphiX: Declared QMP Topology (xyzt):"); + for (int mu = 0; mu < 4; mu++) QPhiX::masterPrintf(" %d", qmp_geom[mu]); + QPhiX::masterPrintf("\n"); + + QPhiX::masterPrintf("# QphiX: Mapping of dimensions QMP -> tmLQCD (xyzt):"); + for (int mu = 0; mu < 4; mu++) QPhiX::masterPrintf(" %d->%d", mu, qmp_tm_map[mu]); + QPhiX::masterPrintf("\n"); + + QPhiX::masterPrintf("# QphiX: Global Lattice Size (xyzt) = "); + for (int mu = 0; mu < 4; mu++) { + QPhiX::masterPrintf(" %d", lattSize[mu]); + } + QPhiX::masterPrintf("\n"); + QPhiX::masterPrintf("# QphiX: Local Lattice Size (xyzt) = "); + for (int mu = 0; mu < 4; mu++) { + QPhiX::masterPrintf(" %d", subLattSize[mu]); + } + QPhiX::masterPrintf("\n"); + QPhiX::masterPrintf("# QphiX: Block Sizes: By= %d Bz=%d\n", By, Bz); + QPhiX::masterPrintf("# QphiX: Cores = %d\n", NCores); + QPhiX::masterPrintf("# QphiX: SMT Grid: Sy=%d Sz=%d\n", Sy, Sz); + QPhiX::masterPrintf("# QphiX: Pad Factors: PadXY=%d PadXYZ=%d\n", PadXY, PadXYZ); + QPhiX::masterPrintf("# QphiX: Threads_per_core = %d\n", N_simt); + QPhiX::masterPrintf("# QphiX: MinCt = %d\n", MinCt); + if (compress) { + QPhiX::masterPrintf("# QphiX: Using two-row gauge compression (compress12)\n"); + } + if (compress_inner) { + QPhiX::masterPrintf("# QphiX: Inner solver using two-row gauge compression (compress12)\n"); + } +} + +void testSpinorPackers(spinor *Even_out, spinor *Odd_out, const spinor *const Even_in, + const spinor *const Odd_in) { + tmlqcd::checkQphixInputParameters(qphix_input); + // FIXME: two-row gauge compression and double precision hard-coded + _initQphix(0, nullptr, qphix_input, 12, QPHIX_DOUBLE_PREC); + + QPhiX::Geometry geom(subLattSize, By, Bz, NCores, Sy, Sz, + PadXY, PadXYZ, MinCt); + + auto qphix_cb_even = QPhiX::makeFourSpinorHandle(geom); + auto qphix_cb_odd = QPhiX::makeFourSpinorHandle(geom); + + spinor **tmp; + init_solver_field(&tmp, VOLUME / 2, 2); + + // reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(Even_in), + // qphix_cb_even.get(), cb_even); + // reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(Odd_in), + // qphix_cb_odd.get(), cb_odd); + reorder_eo_spinor_to_QPhiX(geom, Even_in, qphix_cb_even.get(), cb_even); + reorder_eo_spinor_to_QPhiX(geom, Odd_in, qphix_cb_odd.get(), cb_odd); + + reorder_eo_spinor_from_QPhiX(geom, Even_out, qphix_cb_even.get(), cb_even, 1.0); + reorder_eo_spinor_from_QPhiX(geom, Odd_out, qphix_cb_odd.get(), cb_odd, 1.0); + + diff(tmp[0], Even_out, Even_in, VOLUME / 2); + diff(tmp[1], Odd_out, Odd_in, VOLUME / 2); + double l2norm = square_norm(tmp[0], VOLUME / 2, 1) + square_norm(tmp[1], VOLUME / 2, 1); + QPhiX::masterPrintf("QPHIX eo spinor packer back and forth difference L2 norm: %lf\n", l2norm); + finalize_solver(tmp, 2); +} diff --git a/src/lib/qphix/qphix_interface.hpp b/src/lib/qphix/qphix_interface.hpp new file mode 100644 index 000000000..b487eda66 --- /dev/null +++ b/src/lib/qphix/qphix_interface.hpp @@ -0,0 +1,51 @@ +/*********************************************************************** + * + * Copyright (C) 2017 Bartosz Kostrzewa + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + ***********************************************************************/ + +#pragma once + +#include "global.h" +#include "qphix_types.h" + +#ifdef __cplusplus /* If this is a C++ compiler, use C linkage */ +extern "C" { +#endif + +#include "misc_types.h" +#include "operator_types.h" +#include "solver/matrix_mult_typedef.h" +#include "solver/solver_params.h" +#include "su3.h" + +#ifdef __cplusplus +} +#endif + +#include + +int invert_eo_qphix_nflavour_mshift(std::vector< std::vector< spinor* > > &Odd_out, + std::vector< std::vector< spinor* > > &Odd_in, + const double precision, + const int max_iter, + const int solver_flag, + const int rel_prec, + solver_params_t solver_params, + const SloppyPrecision sloppy, const CompressionType compression, + const int num_flavour); \ No newline at end of file diff --git a/src/lib/qphix/qphix_interface_utils.hpp b/src/lib/qphix/qphix_interface_utils.hpp new file mode 100644 index 000000000..56d8afe56 --- /dev/null +++ b/src/lib/qphix/qphix_interface_utils.hpp @@ -0,0 +1,33 @@ +/*********************************************************************** + * + * Copyright (C) 2015 Mario Schroeck + * 2016 Peter Labus + * 2017 Peter Labus, Martin Ueding, Bartosz Kostrzewa + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + ***********************************************************************/ + +#pragma once + +#include "qphix_types.h" + +namespace tmlqcd { + +void checkQphixInputParameters(const tm_QPhiXParams_t ¶ms); +void printQphixDiagnostics(int VECLEN, int SOALEN, bool compress, int VECLEN_inner, int SOALEN_inner, bool compress_inner); + +} // namespace tmlqcd diff --git a/qphix_interface.h b/src/lib/qphix_interface.h similarity index 100% rename from qphix_interface.h rename to src/lib/qphix_interface.h diff --git a/qphix_types.h b/src/lib/qphix_types.h similarity index 100% rename from qphix_types.h rename to src/lib/qphix_types.h diff --git a/qphix_veclen.h b/src/lib/qphix_veclen.h similarity index 100% rename from qphix_veclen.h rename to src/lib/qphix_veclen.h diff --git a/quda_dummy_types.h b/src/lib/quda_dummy_types.h similarity index 100% rename from quda_dummy_types.h rename to src/lib/quda_dummy_types.h diff --git a/src/lib/quda_gauge_paths.inc b/src/lib/quda_gauge_paths.inc new file mode 100644 index 000000000..d2c898e6c --- /dev/null +++ b/src/lib/quda_gauge_paths.inc @@ -0,0 +1,158 @@ +/*********************************************************************** + * + * Copyright (C) 2021 Bartosz Kostrzewa, Ferenc Pittler, Simone Bacchio + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + * + ***********************************************************************/ + +const int plaq_rect_length[24] = { + 3, 3, 3, 3, 3, 3, + 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, + }; + +const int plaq_rect_path[4][24][5] = { + { {1, 7, 6 }, + {6, 7, 1 }, + {2, 7, 5 }, + {5, 7, 2 }, + {3, 7, 4 }, + {4, 7, 3 }, + {1, 1, 7, 6, 6 }, + {6, 6, 7, 1, 1 }, + {2, 2, 7, 5, 5 }, + {5, 5, 7, 2, 2 }, + {3, 3, 7, 4, 4 }, + {4, 4, 7, 3, 3 }, + {0, 1, 7, 7, 6 }, + {6, 7, 7, 1, 0 }, + {0, 2, 7, 7, 5 }, + {5, 7, 7, 2, 0 }, + {0, 3, 7, 7, 4 }, + {4, 7, 7, 3, 0 }, + {0, 4, 7, 7, 3 }, + {3, 7, 7, 4, 0 }, + {0, 5, 7, 7, 2 }, + {2, 7, 7, 5, 0 }, + {0, 6, 7, 7, 1 }, + {1, 7, 7, 6, 0 } }, + { { 2, 6, 5 }, + { 5, 6, 2 }, + { 3, 6, 4 }, + { 4, 6, 3 }, + { 0, 6, 7 }, + { 7, 6, 0 }, + { 1, 2, 6, 6, 5 }, + { 2, 6, 6, 5, 1 }, + { 5, 6, 6, 2, 1 }, + { 1, 5, 6, 6, 2 }, + { 1, 3, 6, 6, 4 }, + { 3, 6, 6, 4, 1 }, + { 4, 6, 6, 3, 1 }, + { 1, 4, 6, 6, 3 }, + { 1, 0, 6, 6, 7 }, + { 0, 6, 6, 7, 1 }, + { 7, 6, 6, 0, 1 }, + { 1, 7, 6, 6, 0 }, + { 5, 5, 6, 2, 2 }, + { 2, 2, 6, 5, 5 }, + { 4, 4, 6, 3, 3 }, + { 3, 3, 6, 4, 4 }, + { 7, 7, 6, 0, 0 }, + { 0, 0, 6, 7, 7 } }, + { {3, 5, 4}, + {4, 5, 3}, + {0, 5, 7}, + {7, 5, 0}, + {1, 5, 6}, + {6, 5, 1}, + {2, 3, 5, 5, 4}, + {3, 5, 5, 4, 2}, + {4, 5, 5, 3, 2}, + {2, 4, 5, 5, 3}, + {2, 0, 5, 5, 7}, + {0, 5, 5, 7, 2}, + {7, 5, 5, 0, 2}, + {2, 7, 5, 5, 0}, + {2, 1, 5, 5, 6}, + {1, 5, 5, 6, 2}, + {6, 5, 5, 1, 2}, + {2, 6, 5, 5, 1}, + {4, 4, 5, 3, 3}, + {3, 3, 5, 4, 4}, + {7, 7, 5, 0, 0}, + {0, 0, 5, 7, 7}, + {6, 6, 5, 1, 1}, + {1, 1, 5, 6, 6} }, + { { 0, 4, 7 }, + { 7, 4, 0 }, + { 1, 4, 6 }, + { 6, 4, 1 }, + { 2, 4, 5 }, + { 5, 4, 2 }, + { 3, 0, 4, 4, 7 }, + { 0, 4, 4, 7, 3 }, + { 7, 4, 4, 0, 3 }, + { 3, 7, 4, 4, 0 }, + { 3, 1, 4, 4, 6 }, + { 1, 4, 4, 6, 3 }, + { 6, 4, 4, 1, 3 }, + { 3, 6, 4, 4, 1 }, + { 3, 2, 4, 4, 5 }, + { 2, 4, 4, 5, 3 }, + { 5, 4, 4, 2, 3 }, + { 3, 5, 4, 4, 2 }, + { 7, 7, 4, 0, 0 }, + { 0, 0, 4, 7, 7 }, + { 6, 6, 4, 1, 1 }, + { 1, 1, 4, 6, 6 }, + { 5, 5, 4, 2, 2 }, + { 2, 2, 4, 5, 5 } } + }; + +const int plaq_length[] = { + 3, 3, 3, 3, 3, 3 }; + +const int plaq_path[4][6][3] = { + { { 1, 7, 6 }, + { 6, 7, 1 }, + { 2, 7, 5 }, + { 5, 7, 2 }, + { 3, 7, 4 }, + { 4, 7, 3 } }, + { { 2, 6, 5 }, + { 5, 6, 2 }, + { 3, 6, 4 }, + { 4, 6, 3 }, + { 0, 6, 7 }, + { 7, 6, 0 } }, + { { 3, 5, 4}, + { 4, 5, 3}, + { 0, 5, 7}, + { 7, 5, 0}, + { 1, 5, 6}, + { 6, 5, 1} }, + { { 0, 4, 7 }, + { 7, 4, 0 }, + { 1, 4, 6 }, + { 6, 4, 1 }, + { 2, 4, 5 }, + { 5, 4, 2 } } + }; + diff --git a/quda_interface.c b/src/lib/quda_interface.c similarity index 100% rename from quda_interface.c rename to src/lib/quda_interface.c diff --git a/quda_interface.h b/src/lib/quda_interface.h similarity index 100% rename from quda_interface.h rename to src/lib/quda_interface.h diff --git a/quda_types.h b/src/lib/quda_types.h similarity index 100% rename from quda_types.h rename to src/lib/quda_types.h diff --git a/ranlxd.c b/src/lib/ranlxd.c similarity index 100% rename from ranlxd.c rename to src/lib/ranlxd.c diff --git a/ranlxd.h b/src/lib/ranlxd.h similarity index 100% rename from ranlxd.h rename to src/lib/ranlxd.h diff --git a/ranlxs.c b/src/lib/ranlxs.c similarity index 100% rename from ranlxs.c rename to src/lib/ranlxs.c diff --git a/ranlxs.h b/src/lib/ranlxs.h similarity index 100% rename from ranlxs.h rename to src/lib/ranlxs.h diff --git a/rational/Makefile.in b/src/lib/rational/Makefile.in similarity index 100% rename from rational/Makefile.in rename to src/lib/rational/Makefile.in diff --git a/rational/elliptic.c b/src/lib/rational/elliptic.c similarity index 100% rename from rational/elliptic.c rename to src/lib/rational/elliptic.c diff --git a/rational/elliptic.h b/src/lib/rational/elliptic.h similarity index 100% rename from rational/elliptic.h rename to src/lib/rational/elliptic.h diff --git a/rational/rational.c b/src/lib/rational/rational.c similarity index 100% rename from rational/rational.c rename to src/lib/rational/rational.c diff --git a/rational/rational.h b/src/lib/rational/rational.h similarity index 100% rename from rational/rational.h rename to src/lib/rational/rational.h diff --git a/rational/zolotarev.c b/src/lib/rational/zolotarev.c similarity index 100% rename from rational/zolotarev.c rename to src/lib/rational/zolotarev.c diff --git a/rational/zolotarev.h b/src/lib/rational/zolotarev.h similarity index 100% rename from rational/zolotarev.h rename to src/lib/rational/zolotarev.h diff --git a/read_input.h b/src/lib/read_input.h similarity index 100% rename from read_input.h rename to src/lib/read_input.h diff --git a/read_input.l b/src/lib/read_input.l similarity index 100% rename from read_input.l rename to src/lib/read_input.l diff --git a/reweighting_factor.c b/src/lib/reweighting_factor.c similarity index 100% rename from reweighting_factor.c rename to src/lib/reweighting_factor.c diff --git a/reweighting_factor.h b/src/lib/reweighting_factor.h similarity index 100% rename from reweighting_factor.h rename to src/lib/reweighting_factor.h diff --git a/reweighting_factor_nd.c b/src/lib/reweighting_factor_nd.c similarity index 100% rename from reweighting_factor_nd.c rename to src/lib/reweighting_factor_nd.c diff --git a/reweighting_factor_nd.h b/src/lib/reweighting_factor_nd.h similarity index 100% rename from reweighting_factor_nd.h rename to src/lib/reweighting_factor_nd.h diff --git a/rnd_gauge_trafo.c b/src/lib/rnd_gauge_trafo.c similarity index 100% rename from rnd_gauge_trafo.c rename to src/lib/rnd_gauge_trafo.c diff --git a/rnd_gauge_trafo.h b/src/lib/rnd_gauge_trafo.h similarity index 100% rename from rnd_gauge_trafo.h rename to src/lib/rnd_gauge_trafo.h diff --git a/sighandler.c b/src/lib/sighandler.c similarity index 100% rename from sighandler.c rename to src/lib/sighandler.c diff --git a/sighandler.h b/src/lib/sighandler.h similarity index 100% rename from sighandler.h rename to src/lib/sighandler.h diff --git a/smearing/Makefile.in b/src/lib/smearing/Makefile.in similarity index 100% rename from smearing/Makefile.in rename to src/lib/smearing/Makefile.in diff --git a/smearing/ape.h b/src/lib/smearing/ape.h similarity index 100% rename from smearing/ape.h rename to src/lib/smearing/ape.h diff --git a/smearing/ape.ih b/src/lib/smearing/ape.ih similarity index 100% rename from smearing/ape.ih rename to src/lib/smearing/ape.ih diff --git a/smearing/ape_ape_smear.c b/src/lib/smearing/ape_ape_smear.c similarity index 100% rename from smearing/ape_ape_smear.c rename to src/lib/smearing/ape_ape_smear.c diff --git a/smearing/hex.h b/src/lib/smearing/hex.h similarity index 100% rename from smearing/hex.h rename to src/lib/smearing/hex.h diff --git a/smearing/hex.ih b/src/lib/smearing/hex.ih similarity index 100% rename from smearing/hex.ih rename to src/lib/smearing/hex.ih diff --git a/smearing/hex_hex_smear.c b/src/lib/smearing/hex_hex_smear.c similarity index 100% rename from smearing/hex_hex_smear.c rename to src/lib/smearing/hex_hex_smear.c diff --git a/smearing/hex_stout_exclude_none.c b/src/lib/smearing/hex_stout_exclude_none.c similarity index 100% rename from smearing/hex_stout_exclude_none.c rename to src/lib/smearing/hex_stout_exclude_none.c diff --git a/smearing/hex_stout_exclude_one.c b/src/lib/smearing/hex_stout_exclude_one.c similarity index 100% rename from smearing/hex_stout_exclude_one.c rename to src/lib/smearing/hex_stout_exclude_one.c diff --git a/smearing/hex_stout_exclude_two.c b/src/lib/smearing/hex_stout_exclude_two.c similarity index 100% rename from smearing/hex_stout_exclude_two.c rename to src/lib/smearing/hex_stout_exclude_two.c diff --git a/smearing/hyp.h b/src/lib/smearing/hyp.h similarity index 100% rename from smearing/hyp.h rename to src/lib/smearing/hyp.h diff --git a/smearing/hyp.ih b/src/lib/smearing/hyp.ih similarity index 100% rename from smearing/hyp.ih rename to src/lib/smearing/hyp.ih diff --git a/smearing/hyp_APE_project_exclude_none.c b/src/lib/smearing/hyp_APE_project_exclude_none.c similarity index 100% rename from smearing/hyp_APE_project_exclude_none.c rename to src/lib/smearing/hyp_APE_project_exclude_none.c diff --git a/smearing/hyp_APE_project_exclude_one.c b/src/lib/smearing/hyp_APE_project_exclude_one.c similarity index 100% rename from smearing/hyp_APE_project_exclude_one.c rename to src/lib/smearing/hyp_APE_project_exclude_one.c diff --git a/smearing/hyp_APE_project_exclude_two.c b/src/lib/smearing/hyp_APE_project_exclude_two.c similarity index 100% rename from smearing/hyp_APE_project_exclude_two.c rename to src/lib/smearing/hyp_APE_project_exclude_two.c diff --git a/smearing/hyp_hyp_smear.c b/src/lib/smearing/hyp_hyp_smear.c similarity index 100% rename from smearing/hyp_hyp_smear.c rename to src/lib/smearing/hyp_hyp_smear.c diff --git a/smearing/hyp_hyp_staples_exclude_none.c b/src/lib/smearing/hyp_hyp_staples_exclude_none.c similarity index 100% rename from smearing/hyp_hyp_staples_exclude_none.c rename to src/lib/smearing/hyp_hyp_staples_exclude_none.c diff --git a/smearing/hyp_hyp_staples_exclude_one.c b/src/lib/smearing/hyp_hyp_staples_exclude_one.c similarity index 100% rename from smearing/hyp_hyp_staples_exclude_one.c rename to src/lib/smearing/hyp_hyp_staples_exclude_one.c diff --git a/smearing/hyp_hyp_staples_exclude_two.c b/src/lib/smearing/hyp_hyp_staples_exclude_two.c similarity index 100% rename from smearing/hyp_hyp_staples_exclude_two.c rename to src/lib/smearing/hyp_hyp_staples_exclude_two.c diff --git a/smearing/stout.h b/src/lib/smearing/stout.h similarity index 100% rename from smearing/stout.h rename to src/lib/smearing/stout.h diff --git a/smearing/stout.ih b/src/lib/smearing/stout.ih similarity index 100% rename from smearing/stout.ih rename to src/lib/smearing/stout.ih diff --git a/smearing/stout_stout_smear.c b/src/lib/smearing/stout_stout_smear.c similarity index 100% rename from smearing/stout_stout_smear.c rename to src/lib/smearing/stout_stout_smear.c diff --git a/smearing/uils_print_config_to_screen.c b/src/lib/smearing/uils_print_config_to_screen.c similarity index 100% rename from smearing/uils_print_config_to_screen.c rename to src/lib/smearing/uils_print_config_to_screen.c diff --git a/smearing/utils.h b/src/lib/smearing/utils.h similarity index 100% rename from smearing/utils.h rename to src/lib/smearing/utils.h diff --git a/smearing/utils.ih b/src/lib/smearing/utils.ih similarity index 100% rename from smearing/utils.ih rename to src/lib/smearing/utils.ih diff --git a/smearing/utils_generic_staples.c b/src/lib/smearing/utils_generic_staples.c similarity index 100% rename from smearing/utils_generic_staples.c rename to src/lib/smearing/utils_generic_staples.c diff --git a/smearing/utils_print_config_to_screen.c b/src/lib/smearing/utils_print_config_to_screen.c similarity index 100% rename from smearing/utils_print_config_to_screen.c rename to src/lib/smearing/utils_print_config_to_screen.c diff --git a/smearing/utils_print_su3.c b/src/lib/smearing/utils_print_su3.c similarity index 100% rename from smearing/utils_print_su3.c rename to src/lib/smearing/utils_print_su3.c diff --git a/smearing/utils_project_antiherm.c b/src/lib/smearing/utils_project_antiherm.c similarity index 100% rename from smearing/utils_project_antiherm.c rename to src/lib/smearing/utils_project_antiherm.c diff --git a/smearing/utils_project_herm.c b/src/lib/smearing/utils_project_herm.c similarity index 100% rename from smearing/utils_project_herm.c rename to src/lib/smearing/utils_project_herm.c diff --git a/smearing/utils_reunitarize.c b/src/lib/smearing/utils_reunitarize.c similarity index 100% rename from smearing/utils_reunitarize.c rename to src/lib/smearing/utils_reunitarize.c diff --git a/smearing/utils_reunitarize_MILC.c b/src/lib/smearing/utils_reunitarize_MILC.c similarity index 88% rename from smearing/utils_reunitarize_MILC.c rename to src/lib/smearing/utils_reunitarize_MILC.c index 757a797df..b5efa2936 100644 --- a/smearing/utils_reunitarize_MILC.c +++ b/src/lib/smearing/utils_reunitarize_MILC.c @@ -1,4 +1,5 @@ #include "utils.ih" +#include /* No reunitarization code seems to be available, so I've adapted (stolen) this routine from the * MILC code (who stole it elsewhere, I think ;]) -- AD. */ @@ -35,12 +36,11 @@ void reunitarize(su3 *omega) { bj2 = omega->c02; omega->c20 = bj1 * omega->c12; - omega->c20 -= bj2 *omega - ->c11 + omega->c20 -= bj2 *omega->c11; - omega->c21 = bj2 * omega->c10; + omega->c21 = bj2 * omega->c10; omega->c21 -= bj0 * omega->c12; omega->c22 = bj0 * omega->c11; - omega->c22 -= bj1r * omega->c10; + omega->c22 -= bj1 * omega->c10; } diff --git a/solver/M_plus_block_psi_body.c b/src/lib/solver/M_plus_block_psi_body.c similarity index 100% rename from solver/M_plus_block_psi_body.c rename to src/lib/solver/M_plus_block_psi_body.c diff --git a/solver/Makefile.in b/src/lib/solver/Makefile.in similarity index 100% rename from solver/Makefile.in rename to src/lib/solver/Makefile.in diff --git a/solver/Msap.c b/src/lib/solver/Msap.c similarity index 100% rename from solver/Msap.c rename to src/lib/solver/Msap.c diff --git a/solver/Msap.h b/src/lib/solver/Msap.h similarity index 100% rename from solver/Msap.h rename to src/lib/solver/Msap.h diff --git a/solver/bicg_complex.c b/src/lib/solver/bicg_complex.c similarity index 100% rename from solver/bicg_complex.c rename to src/lib/solver/bicg_complex.c diff --git a/solver/bicg_complex.h b/src/lib/solver/bicg_complex.h similarity index 100% rename from solver/bicg_complex.h rename to src/lib/solver/bicg_complex.h diff --git a/solver/bicgstab2.c b/src/lib/solver/bicgstab2.c similarity index 100% rename from solver/bicgstab2.c rename to src/lib/solver/bicgstab2.c diff --git a/solver/bicgstab2.h b/src/lib/solver/bicgstab2.h similarity index 100% rename from solver/bicgstab2.h rename to src/lib/solver/bicgstab2.h diff --git a/solver/bicgstab_complex.c b/src/lib/solver/bicgstab_complex.c similarity index 100% rename from solver/bicgstab_complex.c rename to src/lib/solver/bicgstab_complex.c diff --git a/solver/bicgstab_complex.h b/src/lib/solver/bicgstab_complex.h similarity index 100% rename from solver/bicgstab_complex.h rename to src/lib/solver/bicgstab_complex.h diff --git a/solver/bicgstab_complex_bi.c b/src/lib/solver/bicgstab_complex_bi.c similarity index 100% rename from solver/bicgstab_complex_bi.c rename to src/lib/solver/bicgstab_complex_bi.c diff --git a/solver/bicgstab_complex_bi.h b/src/lib/solver/bicgstab_complex_bi.h similarity index 100% rename from solver/bicgstab_complex_bi.h rename to src/lib/solver/bicgstab_complex_bi.h diff --git a/solver/bicgstabell.c b/src/lib/solver/bicgstabell.c similarity index 100% rename from solver/bicgstabell.c rename to src/lib/solver/bicgstabell.c diff --git a/solver/bicgstabell.h b/src/lib/solver/bicgstabell.h similarity index 100% rename from solver/bicgstabell.h rename to src/lib/solver/bicgstabell.h diff --git a/solver/cg_her.c b/src/lib/solver/cg_her.c similarity index 100% rename from solver/cg_her.c rename to src/lib/solver/cg_her.c diff --git a/solver/cg_her.h b/src/lib/solver/cg_her.h similarity index 100% rename from solver/cg_her.h rename to src/lib/solver/cg_her.h diff --git a/solver/cg_her_bi.c b/src/lib/solver/cg_her_bi.c similarity index 100% rename from solver/cg_her_bi.c rename to src/lib/solver/cg_her_bi.c diff --git a/solver/cg_her_bi.h b/src/lib/solver/cg_her_bi.h similarity index 100% rename from solver/cg_her_bi.h rename to src/lib/solver/cg_her_bi.h diff --git a/solver/cg_her_nd.c b/src/lib/solver/cg_her_nd.c similarity index 100% rename from solver/cg_her_nd.c rename to src/lib/solver/cg_her_nd.c diff --git a/solver/cg_her_nd.h b/src/lib/solver/cg_her_nd.h similarity index 100% rename from solver/cg_her_nd.h rename to src/lib/solver/cg_her_nd.h diff --git a/solver/cg_mms_tm.c b/src/lib/solver/cg_mms_tm.c similarity index 100% rename from solver/cg_mms_tm.c rename to src/lib/solver/cg_mms_tm.c diff --git a/solver/cg_mms_tm.h b/src/lib/solver/cg_mms_tm.h similarity index 100% rename from solver/cg_mms_tm.h rename to src/lib/solver/cg_mms_tm.h diff --git a/solver/cg_mms_tm_nd.c b/src/lib/solver/cg_mms_tm_nd.c similarity index 100% rename from solver/cg_mms_tm_nd.c rename to src/lib/solver/cg_mms_tm_nd.c diff --git a/solver/cg_mms_tm_nd.h b/src/lib/solver/cg_mms_tm_nd.h similarity index 100% rename from solver/cg_mms_tm_nd.h rename to src/lib/solver/cg_mms_tm_nd.h diff --git a/solver/cgne4complex.c b/src/lib/solver/cgne4complex.c similarity index 100% rename from solver/cgne4complex.c rename to src/lib/solver/cgne4complex.c diff --git a/solver/cgne4complex.h b/src/lib/solver/cgne4complex.h similarity index 100% rename from solver/cgne4complex.h rename to src/lib/solver/cgne4complex.h diff --git a/solver/cgs_real.c b/src/lib/solver/cgs_real.c similarity index 100% rename from solver/cgs_real.c rename to src/lib/solver/cgs_real.c diff --git a/solver/cgs_real.h b/src/lib/solver/cgs_real.h similarity index 100% rename from solver/cgs_real.h rename to src/lib/solver/cgs_real.h diff --git a/solver/chrono_guess.c b/src/lib/solver/chrono_guess.c similarity index 100% rename from solver/chrono_guess.c rename to src/lib/solver/chrono_guess.c diff --git a/solver/chrono_guess.h b/src/lib/solver/chrono_guess.h similarity index 100% rename from solver/chrono_guess.h rename to src/lib/solver/chrono_guess.h diff --git a/solver/cr.c b/src/lib/solver/cr.c similarity index 100% rename from solver/cr.c rename to src/lib/solver/cr.c diff --git a/solver/cr.h b/src/lib/solver/cr.h similarity index 100% rename from solver/cr.h rename to src/lib/solver/cr.h diff --git a/solver/dfl_projector.c b/src/lib/solver/dfl_projector.c similarity index 100% rename from solver/dfl_projector.c rename to src/lib/solver/dfl_projector.c diff --git a/solver/dfl_projector.h b/src/lib/solver/dfl_projector.h similarity index 100% rename from solver/dfl_projector.h rename to src/lib/solver/dfl_projector.h diff --git a/solver/diagonalise_general_matrix.c b/src/lib/solver/diagonalise_general_matrix.c similarity index 100% rename from solver/diagonalise_general_matrix.c rename to src/lib/solver/diagonalise_general_matrix.c diff --git a/solver/diagonalise_general_matrix.h b/src/lib/solver/diagonalise_general_matrix.h similarity index 100% rename from solver/diagonalise_general_matrix.h rename to src/lib/solver/diagonalise_general_matrix.h diff --git a/solver/dirac_operator_eigenvectors.c b/src/lib/solver/dirac_operator_eigenvectors.c similarity index 100% rename from solver/dirac_operator_eigenvectors.c rename to src/lib/solver/dirac_operator_eigenvectors.c diff --git a/solver/dirac_operator_eigenvectors.h b/src/lib/solver/dirac_operator_eigenvectors.h similarity index 100% rename from solver/dirac_operator_eigenvectors.h rename to src/lib/solver/dirac_operator_eigenvectors.h diff --git a/solver/eigcg.c b/src/lib/solver/eigcg.c similarity index 100% rename from solver/eigcg.c rename to src/lib/solver/eigcg.c diff --git a/solver/eigcg.h b/src/lib/solver/eigcg.h similarity index 100% rename from solver/eigcg.h rename to src/lib/solver/eigcg.h diff --git a/solver/eigenvalues.c b/src/lib/solver/eigenvalues.c similarity index 100% rename from solver/eigenvalues.c rename to src/lib/solver/eigenvalues.c diff --git a/solver/eigenvalues.h b/src/lib/solver/eigenvalues.h similarity index 100% rename from solver/eigenvalues.h rename to src/lib/solver/eigenvalues.h diff --git a/solver/eigenvalues_bi.c b/src/lib/solver/eigenvalues_bi.c similarity index 100% rename from solver/eigenvalues_bi.c rename to src/lib/solver/eigenvalues_bi.c diff --git a/solver/eigenvalues_bi.h b/src/lib/solver/eigenvalues_bi.h similarity index 100% rename from solver/eigenvalues_bi.h rename to src/lib/solver/eigenvalues_bi.h diff --git a/solver/fgmres.c b/src/lib/solver/fgmres.c similarity index 100% rename from solver/fgmres.c rename to src/lib/solver/fgmres.c diff --git a/solver/fgmres.h b/src/lib/solver/fgmres.h similarity index 100% rename from solver/fgmres.h rename to src/lib/solver/fgmres.h diff --git a/solver/fgmres4complex.c b/src/lib/solver/fgmres4complex.c similarity index 100% rename from solver/fgmres4complex.c rename to src/lib/solver/fgmres4complex.c diff --git a/solver/fgmres4complex.h b/src/lib/solver/fgmres4complex.h similarity index 100% rename from solver/fgmres4complex.h rename to src/lib/solver/fgmres4complex.h diff --git a/solver/fgmres4complex_body.c b/src/lib/solver/fgmres4complex_body.c similarity index 100% rename from solver/fgmres4complex_body.c rename to src/lib/solver/fgmres4complex_body.c diff --git a/solver/gcr.c b/src/lib/solver/gcr.c similarity index 100% rename from solver/gcr.c rename to src/lib/solver/gcr.c diff --git a/solver/gcr.h b/src/lib/solver/gcr.h similarity index 100% rename from solver/gcr.h rename to src/lib/solver/gcr.h diff --git a/solver/gcr4complex.c b/src/lib/solver/gcr4complex.c similarity index 100% rename from solver/gcr4complex.c rename to src/lib/solver/gcr4complex.c diff --git a/solver/gcr4complex.h b/src/lib/solver/gcr4complex.h similarity index 100% rename from solver/gcr4complex.h rename to src/lib/solver/gcr4complex.h diff --git a/solver/gcr4complex_body.c b/src/lib/solver/gcr4complex_body.c similarity index 100% rename from solver/gcr4complex_body.c rename to src/lib/solver/gcr4complex_body.c diff --git a/solver/gcr4complex_body.h b/src/lib/solver/gcr4complex_body.h similarity index 100% rename from solver/gcr4complex_body.h rename to src/lib/solver/gcr4complex_body.h diff --git a/solver/generate_dfl_subspace.c b/src/lib/solver/generate_dfl_subspace.c similarity index 100% rename from solver/generate_dfl_subspace.c rename to src/lib/solver/generate_dfl_subspace.c diff --git a/solver/generate_dfl_subspace.h b/src/lib/solver/generate_dfl_subspace.h similarity index 100% rename from solver/generate_dfl_subspace.h rename to src/lib/solver/generate_dfl_subspace.h diff --git a/solver/gmres.c b/src/lib/solver/gmres.c similarity index 100% rename from solver/gmres.c rename to src/lib/solver/gmres.c diff --git a/solver/gmres.h b/src/lib/solver/gmres.h similarity index 100% rename from solver/gmres.h rename to src/lib/solver/gmres.h diff --git a/solver/gmres_dr.c b/src/lib/solver/gmres_dr.c similarity index 100% rename from solver/gmres_dr.c rename to src/lib/solver/gmres_dr.c diff --git a/solver/gmres_dr.h b/src/lib/solver/gmres_dr.h similarity index 100% rename from solver/gmres_dr.h rename to src/lib/solver/gmres_dr.h diff --git a/solver/gmres_precon.c b/src/lib/solver/gmres_precon.c similarity index 100% rename from solver/gmres_precon.c rename to src/lib/solver/gmres_precon.c diff --git a/solver/gmres_precon.h b/src/lib/solver/gmres_precon.h similarity index 100% rename from solver/gmres_precon.h rename to src/lib/solver/gmres_precon.h diff --git a/solver/gram-schmidt.c b/src/lib/solver/gram-schmidt.c similarity index 100% rename from solver/gram-schmidt.c rename to src/lib/solver/gram-schmidt.c diff --git a/solver/gram-schmidt.h b/src/lib/solver/gram-schmidt.h similarity index 100% rename from solver/gram-schmidt.h rename to src/lib/solver/gram-schmidt.h diff --git a/solver/incr_eigcg.c b/src/lib/solver/incr_eigcg.c similarity index 100% rename from solver/incr_eigcg.c rename to src/lib/solver/incr_eigcg.c diff --git a/solver/incr_eigcg.h b/src/lib/solver/incr_eigcg.h similarity index 100% rename from solver/incr_eigcg.h rename to src/lib/solver/incr_eigcg.h diff --git a/solver/index_jd.c b/src/lib/solver/index_jd.c similarity index 100% rename from solver/index_jd.c rename to src/lib/solver/index_jd.c diff --git a/solver/index_jd.h b/src/lib/solver/index_jd.h similarity index 100% rename from solver/index_jd.h rename to src/lib/solver/index_jd.h diff --git a/solver/init_guess.c b/src/lib/solver/init_guess.c similarity index 100% rename from solver/init_guess.c rename to src/lib/solver/init_guess.c diff --git a/solver/init_guess.h b/src/lib/solver/init_guess.h similarity index 100% rename from solver/init_guess.h rename to src/lib/solver/init_guess.h diff --git a/solver/jdher.c b/src/lib/solver/jdher.c similarity index 100% rename from solver/jdher.c rename to src/lib/solver/jdher.c diff --git a/solver/jdher.h b/src/lib/solver/jdher.h similarity index 100% rename from solver/jdher.h rename to src/lib/solver/jdher.h diff --git a/solver/jdher_bi.c b/src/lib/solver/jdher_bi.c similarity index 100% rename from solver/jdher_bi.c rename to src/lib/solver/jdher_bi.c diff --git a/solver/jdher_bi.h b/src/lib/solver/jdher_bi.h similarity index 100% rename from solver/jdher_bi.h rename to src/lib/solver/jdher_bi.h diff --git a/solver/little_mg_precon_body.c b/src/lib/solver/little_mg_precon_body.c similarity index 100% rename from solver/little_mg_precon_body.c rename to src/lib/solver/little_mg_precon_body.c diff --git a/solver/little_project_eo_body.c b/src/lib/solver/little_project_eo_body.c similarity index 100% rename from solver/little_project_eo_body.c rename to src/lib/solver/little_project_eo_body.c diff --git a/solver/lu_solve.c b/src/lib/solver/lu_solve.c similarity index 100% rename from solver/lu_solve.c rename to src/lib/solver/lu_solve.c diff --git a/solver/lu_solve.h b/src/lib/solver/lu_solve.h similarity index 100% rename from solver/lu_solve.h rename to src/lib/solver/lu_solve.h diff --git a/solver/matrix_mult_typedef.h b/src/lib/solver/matrix_mult_typedef.h similarity index 100% rename from solver/matrix_mult_typedef.h rename to src/lib/solver/matrix_mult_typedef.h diff --git a/solver/matrix_mult_typedef_bi.h b/src/lib/solver/matrix_mult_typedef_bi.h similarity index 100% rename from solver/matrix_mult_typedef_bi.h rename to src/lib/solver/matrix_mult_typedef_bi.h diff --git a/solver/matrix_mult_typedef_nd.h b/src/lib/solver/matrix_mult_typedef_nd.h similarity index 100% rename from solver/matrix_mult_typedef_nd.h rename to src/lib/solver/matrix_mult_typedef_nd.h diff --git a/solver/mcr.c b/src/lib/solver/mcr.c similarity index 100% rename from solver/mcr.c rename to src/lib/solver/mcr.c diff --git a/solver/mcr.h b/src/lib/solver/mcr.h similarity index 100% rename from solver/mcr.h rename to src/lib/solver/mcr.h diff --git a/solver/mcr4complex.c b/src/lib/solver/mcr4complex.c similarity index 100% rename from solver/mcr4complex.c rename to src/lib/solver/mcr4complex.c diff --git a/solver/mcr4complex.h b/src/lib/solver/mcr4complex.h similarity index 100% rename from solver/mcr4complex.h rename to src/lib/solver/mcr4complex.h diff --git a/solver/mixed_cg_her.c b/src/lib/solver/mixed_cg_her.c similarity index 100% rename from solver/mixed_cg_her.c rename to src/lib/solver/mixed_cg_her.c diff --git a/solver/mixed_cg_her.h b/src/lib/solver/mixed_cg_her.h similarity index 100% rename from solver/mixed_cg_her.h rename to src/lib/solver/mixed_cg_her.h diff --git a/solver/mixed_cg_mms_tm_nd.c b/src/lib/solver/mixed_cg_mms_tm_nd.c similarity index 100% rename from solver/mixed_cg_mms_tm_nd.c rename to src/lib/solver/mixed_cg_mms_tm_nd.c diff --git a/solver/mixed_cg_mms_tm_nd.h b/src/lib/solver/mixed_cg_mms_tm_nd.h similarity index 100% rename from solver/mixed_cg_mms_tm_nd.h rename to src/lib/solver/mixed_cg_mms_tm_nd.h diff --git a/solver/monomial_solve.c b/src/lib/solver/monomial_solve.c similarity index 100% rename from solver/monomial_solve.c rename to src/lib/solver/monomial_solve.c diff --git a/solver/monomial_solve.h b/src/lib/solver/monomial_solve.h similarity index 100% rename from solver/monomial_solve.h rename to src/lib/solver/monomial_solve.h diff --git a/solver/mr.c b/src/lib/solver/mr.c similarity index 100% rename from solver/mr.c rename to src/lib/solver/mr.c diff --git a/solver/mr.h b/src/lib/solver/mr.h similarity index 100% rename from solver/mr.h rename to src/lib/solver/mr.h diff --git a/solver/mr4complex.c b/src/lib/solver/mr4complex.c similarity index 100% rename from solver/mr4complex.c rename to src/lib/solver/mr4complex.c diff --git a/solver/mr4complex.h b/src/lib/solver/mr4complex.h similarity index 100% rename from solver/mr4complex.h rename to src/lib/solver/mr4complex.h diff --git a/solver/mrblk_body.c b/src/lib/solver/mrblk_body.c similarity index 100% rename from solver/mrblk_body.c rename to src/lib/solver/mrblk_body.c diff --git a/solver/ortho.c b/src/lib/solver/ortho.c similarity index 100% rename from solver/ortho.c rename to src/lib/solver/ortho.c diff --git a/solver/ortho.h b/src/lib/solver/ortho.h similarity index 100% rename from solver/ortho.h rename to src/lib/solver/ortho.h diff --git a/solver/pcg_her.c b/src/lib/solver/pcg_her.c similarity index 100% rename from solver/pcg_her.c rename to src/lib/solver/pcg_her.c diff --git a/solver/pcg_her.h b/src/lib/solver/pcg_her.h similarity index 100% rename from solver/pcg_her.h rename to src/lib/solver/pcg_her.h diff --git a/solver/poly_precon.c b/src/lib/solver/poly_precon.c similarity index 100% rename from solver/poly_precon.c rename to src/lib/solver/poly_precon.c diff --git a/solver/poly_precon.h b/src/lib/solver/poly_precon.h similarity index 100% rename from solver/poly_precon.h rename to src/lib/solver/poly_precon.h diff --git a/solver/quicksort.c b/src/lib/solver/quicksort.c similarity index 100% rename from solver/quicksort.c rename to src/lib/solver/quicksort.c diff --git a/solver/quicksort.h b/src/lib/solver/quicksort.h similarity index 100% rename from solver/quicksort.h rename to src/lib/solver/quicksort.h diff --git a/solver/restart_X.c b/src/lib/solver/restart_X.c similarity index 100% rename from solver/restart_X.c rename to src/lib/solver/restart_X.c diff --git a/solver/restart_X.h b/src/lib/solver/restart_X.h similarity index 100% rename from solver/restart_X.h rename to src/lib/solver/restart_X.h diff --git a/solver/rg_mixed_cg_her.c b/src/lib/solver/rg_mixed_cg_her.c similarity index 100% rename from solver/rg_mixed_cg_her.c rename to src/lib/solver/rg_mixed_cg_her.c diff --git a/solver/rg_mixed_cg_her.h b/src/lib/solver/rg_mixed_cg_her.h similarity index 100% rename from solver/rg_mixed_cg_her.h rename to src/lib/solver/rg_mixed_cg_her.h diff --git a/solver/rg_mixed_cg_her_nd.c b/src/lib/solver/rg_mixed_cg_her_nd.c similarity index 100% rename from solver/rg_mixed_cg_her_nd.c rename to src/lib/solver/rg_mixed_cg_her_nd.c diff --git a/solver/rg_mixed_cg_her_nd.h b/src/lib/solver/rg_mixed_cg_her_nd.h similarity index 100% rename from solver/rg_mixed_cg_her_nd.h rename to src/lib/solver/rg_mixed_cg_her_nd.h diff --git a/solver/rg_mixed_cg_typedef.h b/src/lib/solver/rg_mixed_cg_typedef.h similarity index 100% rename from solver/rg_mixed_cg_typedef.h rename to src/lib/solver/rg_mixed_cg_typedef.h diff --git a/solver/solver.h b/src/lib/solver/solver.h similarity index 100% rename from solver/solver.h rename to src/lib/solver/solver.h diff --git a/solver/solver_field.c b/src/lib/solver/solver_field.c similarity index 100% rename from solver/solver_field.c rename to src/lib/solver/solver_field.c diff --git a/solver/solver_field.h b/src/lib/solver/solver_field.h similarity index 100% rename from solver/solver_field.h rename to src/lib/solver/solver_field.h diff --git a/solver/solver_params.h b/src/lib/solver/solver_params.h similarity index 100% rename from solver/solver_params.h rename to src/lib/solver/solver_params.h diff --git a/solver/solver_types.c b/src/lib/solver/solver_types.c similarity index 100% rename from solver/solver_types.c rename to src/lib/solver/solver_types.c diff --git a/solver/solver_types.h b/src/lib/solver/solver_types.h similarity index 100% rename from solver/solver_types.h rename to src/lib/solver/solver_types.h diff --git a/solver/sub_low_ev.c b/src/lib/solver/sub_low_ev.c similarity index 100% rename from solver/sub_low_ev.c rename to src/lib/solver/sub_low_ev.c diff --git a/solver/sub_low_ev.h b/src/lib/solver/sub_low_ev.h similarity index 100% rename from solver/sub_low_ev.h rename to src/lib/solver/sub_low_ev.h diff --git a/solver/sumr.c b/src/lib/solver/sumr.c similarity index 100% rename from solver/sumr.c rename to src/lib/solver/sumr.c diff --git a/solver/sumr.h b/src/lib/solver/sumr.h similarity index 100% rename from solver/sumr.h rename to src/lib/solver/sumr.h diff --git a/source_generation.c b/src/lib/source_generation.c similarity index 100% rename from source_generation.c rename to src/lib/source_generation.c diff --git a/source_generation.h b/src/lib/source_generation.h similarity index 100% rename from source_generation.h rename to src/lib/source_generation.h diff --git a/spinor_fft.c b/src/lib/spinor_fft.c similarity index 100% rename from spinor_fft.c rename to src/lib/spinor_fft.c diff --git a/spinor_fft.h b/src/lib/spinor_fft.h similarity index 100% rename from spinor_fft.h rename to src/lib/spinor_fft.h diff --git a/start.c b/src/lib/start.c similarity index 100% rename from start.c rename to src/lib/start.c diff --git a/start.h b/src/lib/start.h similarity index 100% rename from start.h rename to src/lib/start.h diff --git a/struct_accessors.h b/src/lib/struct_accessors.h similarity index 100% rename from struct_accessors.h rename to src/lib/struct_accessors.h diff --git a/su3.h b/src/lib/su3.h similarity index 100% rename from su3.h rename to src/lib/su3.h diff --git a/su3adj.h b/src/lib/su3adj.h similarity index 100% rename from su3adj.h rename to src/lib/su3adj.h diff --git a/su3spinor.h b/src/lib/su3spinor.h similarity index 100% rename from su3spinor.h rename to src/lib/su3spinor.h diff --git a/tensors.h b/src/lib/tensors.h similarity index 100% rename from tensors.h rename to src/lib/tensors.h diff --git a/test/Makefile b/src/lib/test/Makefile similarity index 100% rename from test/Makefile rename to src/lib/test/Makefile diff --git a/test/check_geometry.c b/src/lib/test/check_geometry.c similarity index 100% rename from test/check_geometry.c rename to src/lib/test/check_geometry.c diff --git a/test/check_geometry.h b/src/lib/test/check_geometry.h similarity index 100% rename from test/check_geometry.h rename to src/lib/test/check_geometry.h diff --git a/test/check_nan.c b/src/lib/test/check_nan.c similarity index 100% rename from test/check_nan.c rename to src/lib/test/check_nan.c diff --git a/test/check_nan.h b/src/lib/test/check_nan.h similarity index 100% rename from test/check_nan.h rename to src/lib/test/check_nan.h diff --git a/test/check_overlap.c b/src/lib/test/check_overlap.c similarity index 100% rename from test/check_overlap.c rename to src/lib/test/check_overlap.c diff --git a/test/check_xchange.c b/src/lib/test/check_xchange.c similarity index 100% rename from test/check_xchange.c rename to src/lib/test/check_xchange.c diff --git a/test/hopping_test.README b/src/lib/test/hopping_test.README similarity index 100% rename from test/hopping_test.README rename to src/lib/test/hopping_test.README diff --git a/test/hopping_test.input.compare b/src/lib/test/hopping_test.input.compare similarity index 100% rename from test/hopping_test.input.compare rename to src/lib/test/hopping_test.input.compare diff --git a/test/hopping_test.input.new b/src/lib/test/hopping_test.input.new similarity index 100% rename from test/hopping_test.input.new rename to src/lib/test/hopping_test.input.new diff --git a/test/hopping_test.input.start b/src/lib/test/hopping_test.input.start similarity index 100% rename from test/hopping_test.input.start rename to src/lib/test/hopping_test.input.start diff --git a/test/hopping_test_generate_script b/src/lib/test/hopping_test_generate_script similarity index 100% rename from test/hopping_test_generate_script rename to src/lib/test/hopping_test_generate_script diff --git a/test/hopping_test_qscript b/src/lib/test/hopping_test_qscript similarity index 100% rename from test/hopping_test_qscript rename to src/lib/test/hopping_test_qscript diff --git a/test/measure_rectangles.debug.c b/src/lib/test/measure_rectangles.debug.c similarity index 100% rename from test/measure_rectangles.debug.c rename to src/lib/test/measure_rectangles.debug.c diff --git a/test/overlaptests.c b/src/lib/test/overlaptests.c similarity index 100% rename from test/overlaptests.c rename to src/lib/test/overlaptests.c diff --git a/test/overlaptests.h b/src/lib/test/overlaptests.h similarity index 100% rename from test/overlaptests.h rename to src/lib/test/overlaptests.h diff --git a/test/qdran64.h b/src/lib/test/qdran64.h similarity index 100% rename from test/qdran64.h rename to src/lib/test/qdran64.h diff --git a/tm_debug_printf.c b/src/lib/tm_debug_printf.c similarity index 100% rename from tm_debug_printf.c rename to src/lib/tm_debug_printf.c diff --git a/tm_debug_printf.h b/src/lib/tm_debug_printf.h similarity index 100% rename from tm_debug_printf.h rename to src/lib/tm_debug_printf.h diff --git a/update_backward_gauge.c b/src/lib/update_backward_gauge.c similarity index 100% rename from update_backward_gauge.c rename to src/lib/update_backward_gauge.c diff --git a/update_backward_gauge.h b/src/lib/update_backward_gauge.h similarity index 100% rename from update_backward_gauge.h rename to src/lib/update_backward_gauge.h diff --git a/update_gauge.c b/src/lib/update_gauge.c similarity index 100% rename from update_gauge.c rename to src/lib/update_gauge.c diff --git a/update_gauge.h b/src/lib/update_gauge.h similarity index 100% rename from update_gauge.h rename to src/lib/update_gauge.h diff --git a/update_momenta.c b/src/lib/update_momenta.c similarity index 100% rename from update_momenta.c rename to src/lib/update_momenta.c diff --git a/update_momenta.h b/src/lib/update_momenta.h similarity index 100% rename from update_momenta.h rename to src/lib/update_momenta.h diff --git a/update_momenta_fg.c b/src/lib/update_momenta_fg.c similarity index 100% rename from update_momenta_fg.c rename to src/lib/update_momenta_fg.c diff --git a/update_momenta_fg.h b/src/lib/update_momenta_fg.h similarity index 100% rename from update_momenta_fg.h rename to src/lib/update_momenta_fg.h diff --git a/update_tm.c b/src/lib/update_tm.c similarity index 100% rename from update_tm.c rename to src/lib/update_tm.c diff --git a/update_tm.h b/src/lib/update_tm.h similarity index 100% rename from update_tm.h rename to src/lib/update_tm.h diff --git a/util/io.c b/src/lib/util/io.c similarity index 100% rename from util/io.c rename to src/lib/util/io.c diff --git a/util/io.h b/src/lib/util/io.h similarity index 100% rename from util/io.h rename to src/lib/util/io.h diff --git a/util/laguer/Makefile b/src/lib/util/laguer/Makefile similarity index 100% rename from util/laguer/Makefile rename to src/lib/util/laguer/Makefile diff --git a/util/laguer/chebyRoot.C b/src/lib/util/laguer/chebyRoot.C similarity index 100% rename from util/laguer/chebyRoot.C rename to src/lib/util/laguer/chebyRoot.C diff --git a/util/laguer/chebyRoot.H b/src/lib/util/laguer/chebyRoot.H similarity index 100% rename from util/laguer/chebyRoot.H rename to src/lib/util/laguer/chebyRoot.H diff --git a/util/laguer/laguer.c b/src/lib/util/laguer/laguer.c similarity index 100% rename from util/laguer/laguer.c rename to src/lib/util/laguer/laguer.c diff --git a/util/laguer/quadroptRoot.C b/src/lib/util/laguer/quadroptRoot.C similarity index 100% rename from util/laguer/quadroptRoot.C rename to src/lib/util/laguer/quadroptRoot.C diff --git a/util/oox/Makefile b/src/lib/util/oox/Makefile similarity index 100% rename from util/oox/Makefile rename to src/lib/util/oox/Makefile diff --git a/util/oox/oox.c b/src/lib/util/oox/oox.c similarity index 100% rename from util/oox/oox.c rename to src/lib/util/oox/oox.c diff --git a/util/oox/oox_gawrapper.cxx b/src/lib/util/oox/oox_gawrapper.cxx similarity index 100% rename from util/oox/oox_gawrapper.cxx rename to src/lib/util/oox/oox_gawrapper.cxx diff --git a/util/oox/oox_gawrapper.h b/src/lib/util/oox/oox_gawrapper.h similarity index 100% rename from util/oox/oox_gawrapper.h rename to src/lib/util/oox/oox_gawrapper.h diff --git a/util/swapendian.c b/src/lib/util/swapendian.c similarity index 100% rename from util/swapendian.c rename to src/lib/util/swapendian.c diff --git a/util/tmlqcd-indent b/src/lib/util/tmlqcd-indent similarity index 100% rename from util/tmlqcd-indent rename to src/lib/util/tmlqcd-indent diff --git a/wrapper/Makefile.in b/src/lib/wrapper/Makefile.in similarity index 100% rename from wrapper/Makefile.in rename to src/lib/wrapper/Makefile.in diff --git a/wrapper/lib_wrapper.c b/src/lib/wrapper/lib_wrapper.c similarity index 100% rename from wrapper/lib_wrapper.c rename to src/lib/wrapper/lib_wrapper.c diff --git a/xchange/Makefile.in b/src/lib/xchange/Makefile.in similarity index 100% rename from xchange/Makefile.in rename to src/lib/xchange/Makefile.in diff --git a/xchange/little_field_gather.c b/src/lib/xchange/little_field_gather.c similarity index 100% rename from xchange/little_field_gather.c rename to src/lib/xchange/little_field_gather.c diff --git a/xchange/little_field_gather.h b/src/lib/xchange/little_field_gather.h similarity index 100% rename from xchange/little_field_gather.h rename to src/lib/xchange/little_field_gather.h diff --git a/xchange/little_field_gather_body.c b/src/lib/xchange/little_field_gather_body.c similarity index 100% rename from xchange/little_field_gather_body.c rename to src/lib/xchange/little_field_gather_body.c diff --git a/xchange/xchange.h b/src/lib/xchange/xchange.h similarity index 100% rename from xchange/xchange.h rename to src/lib/xchange/xchange.h diff --git a/xchange/xchange_2fields.c b/src/lib/xchange/xchange_2fields.c similarity index 100% rename from xchange/xchange_2fields.c rename to src/lib/xchange/xchange_2fields.c diff --git a/xchange/xchange_2fields.h b/src/lib/xchange/xchange_2fields.h similarity index 100% rename from xchange/xchange_2fields.h rename to src/lib/xchange/xchange_2fields.h diff --git a/xchange/xchange_deri.c b/src/lib/xchange/xchange_deri.c similarity index 100% rename from xchange/xchange_deri.c rename to src/lib/xchange/xchange_deri.c diff --git a/xchange/xchange_deri.h b/src/lib/xchange/xchange_deri.h similarity index 100% rename from xchange/xchange_deri.h rename to src/lib/xchange/xchange_deri.h diff --git a/xchange/xchange_field.c b/src/lib/xchange/xchange_field.c similarity index 100% rename from xchange/xchange_field.c rename to src/lib/xchange/xchange_field.c diff --git a/xchange/xchange_field.h b/src/lib/xchange/xchange_field.h similarity index 100% rename from xchange/xchange_field.h rename to src/lib/xchange/xchange_field.h diff --git a/xchange/xchange_gauge.c b/src/lib/xchange/xchange_gauge.c similarity index 100% rename from xchange/xchange_gauge.c rename to src/lib/xchange/xchange_gauge.c diff --git a/xchange/xchange_gauge.h b/src/lib/xchange/xchange_gauge.h similarity index 100% rename from xchange/xchange_gauge.h rename to src/lib/xchange/xchange_gauge.h diff --git a/xchange/xchange_halffield.c b/src/lib/xchange/xchange_halffield.c similarity index 100% rename from xchange/xchange_halffield.c rename to src/lib/xchange/xchange_halffield.c diff --git a/xchange/xchange_halffield.h b/src/lib/xchange/xchange_halffield.h similarity index 100% rename from xchange/xchange_halffield.h rename to src/lib/xchange/xchange_halffield.h diff --git a/xchange/xchange_lexicfield.c b/src/lib/xchange/xchange_lexicfield.c similarity index 100% rename from xchange/xchange_lexicfield.c rename to src/lib/xchange/xchange_lexicfield.c diff --git a/xchange/xchange_lexicfield.h b/src/lib/xchange/xchange_lexicfield.h similarity index 100% rename from xchange/xchange_lexicfield.h rename to src/lib/xchange/xchange_lexicfield.h From e1d95e310b297bda1f50b50a5264ab5c22a0b813 Mon Sep 17 00:00:00 2001 From: Mathieu Taillefumier Date: Mon, 9 Feb 2026 09:11:11 +0100 Subject: [PATCH 02/19] Use TM_BLA for #ifdef flags - Moved git hash string to a c file - Removed the CRAY keyword - Moved tests files in separate directory - Namespace all #ifdef varaibles TM_XXX - Moved profile directory to the root - updated the url info --- CMakeLists.txt | 160 +- Makefile.global | 64 - Makefile.in | 167 -- Makefile.tests | 64 - cmake/FindDDAlphaAMG.cmake | 29 + cmake/{git_hash.h.in => git_hash.c.in} | 2 +- cmake/tmlqcd_config_internal.h.in | 58 +- cmake_includes.txt | 425 ---- config.guess | 1701 ------------- config.sub | 1855 -------------- configure.in | 737 ------ .../lib/profiling => profiling}/hmc/Readme.md | 0 .../hmc/example_profile.pdf | Bin .../profiling => profiling}/hmc/profile.Rmd | 0 {src/lib/profiling => profiling}/hmc/timing.R | 0 .../hmc_mk2/.gitignore | 0 .../profiling => profiling}/hmc_mk2/README.md | 0 .../hmc_mk2/logs/example_log.out | 4 +- .../hmc_mk2/make_profile.R | 0 .../hmc_mk2/profile.Rmd | 0 qphix_base_classes.hpp | 771 ------ qphix_interface.cpp | 2192 ----------------- qphix_interface.hpp | 51 - qphix_interface_utils.hpp | 33 - src/bin/LapH_ev.c | 20 +- src/bin/benchmark.c | 46 +- src/bin/deriv_mg_tune.c | 12 +- src/bin/hmc_tm.c | 16 +- src/bin/invert.c | 14 +- src/bin/offline_measurement.c | 10 +- src/bin/{ => tests}/check_locallity.c | 12 +- src/bin/{ => tests}/hopping_test.c | 40 +- src/bin/{ => tests}/qphix_test_Dslash.c | 12 +- src/bin/{ => tests}/scalar_prod_r_test.c | 0 src/bin/{ => tests}/test_eigenvalues.c | 12 +- src/bin/{ => tests}/test_lemon.c | 4 +- src/lib/CMakeLists.txt | 20 +- src/lib/DDalphaAMG_interface.c | 60 +- src/lib/DDalphaAMG_interface.h | 2 +- .../utils_generic_exchange.blocking.inc | 12 +- src/lib/buffers/utils_generic_exchange.c | 12 +- .../utils_generic_exchange.nonblocking.inc | 16 +- src/lib/deriv_Sb.c | 22 +- src/lib/deriv_Sb_D_psi.c | 4 +- .../lib/fixed_volume.h.in | 0 src/lib/geometry_eo.c | 148 +- src/lib/get_rectangle_staples.c | 4 +- src/lib/get_staples.c | 12 +- src/lib/gettime.c | 4 +- src/lib/git_hash.h | 6 + src/lib/global.h | 6 +- src/lib/init/init_dirac_halfspinor.c | 48 +- src/lib/init/init_gauge_field.c | 8 +- src/lib/init/init_geometry_indices.c | 4 +- src/lib/init/init_parallel.h | 4 +- src/lib/init/init_spinor_field.c | 16 +- src/lib/invert_clover_eo.c | 6 +- src/lib/invert_doublet_eo.c | 6 +- src/lib/invert_eo.c | 4 +- src/lib/io/gauge_read.c | 4 +- src/lib/io/gauge_read_binary.c | 6 +- src/lib/io/gauge_write_binary.c | 6 +- src/lib/io/selector.h | 8 +- src/lib/io/spinor_read_binary.c | 12 +- src/lib/io/spinor_write_binary.c | 12 +- src/lib/io/spinor_write_propagator_type.c | 8 +- src/lib/io/spinor_write_source_format.c | 8 +- src/lib/io/utils_construct_reader.c | 14 +- src/lib/io/utils_construct_writer.c | 8 +- src/lib/io/utils_destruct_reader.c | 6 +- src/lib/io/utils_destruct_writer.c | 6 +- src/lib/io/utils_kill_with_error.c | 4 +- src/lib/io/utils_write_first_message.c | 24 +- src/lib/io/utils_write_header.c | 8 +- src/lib/io/utils_write_message.c | 8 +- src/lib/linalg/blas.h | 4 +- src/lib/linalg/lapack.h | 2 +- src/lib/little_D.c | 8 +- src/lib/meas/polyakov_loop.c | 10 +- src/lib/measure_gauge_action.c | 2 - src/lib/mpi_init.c | 74 +- src/lib/mpi_init.h | 4 +- src/lib/operator.c | 2 +- src/lib/operator/D_psi_body.c | 2 +- src/lib/operator/Hopping_Matrix.c | 14 +- src/lib/operator/Hopping_Matrix_32.c | 10 +- src/lib/operator/Hopping_Matrix_nocom.c | 4 +- src/lib/operator/halfspinor_body.c | 4 +- src/lib/operator/hopping_bg_dbl.c | 20 +- src/lib/operator/hopping_body_dbl.c | 20 +- src/lib/operator/hopping_sgl.c | 18 +- src/lib/operator/tm_sub_Hopping_Matrix.c | 8 +- src/lib/operator/tm_times_Hopping_Matrix.c | 10 +- src/lib/overrelaxation.c | 2 +- src/lib/parallel_io.h | 4 +- src/lib/read_input.l | 44 +- src/lib/solver/cg_her.c | 2 +- src/lib/solver/cg_her_nd.c | 2 +- src/lib/solver/cr.c | 2 +- src/lib/solver/diagonalise_general_matrix.c | 2 +- src/lib/solver/dirac_operator_eigenvectors.c | 20 +- src/lib/solver/dirac_operator_eigenvectors.h | 6 +- src/lib/solver/eigenvalues.c | 2 +- src/lib/solver/fgmres.c | 2 +- src/lib/solver/fgmres4complex_body.c | 2 +- src/lib/solver/gmres_dr.c | 2 +- src/lib/solver/gram-schmidt.c | 6 +- src/lib/solver/mcr.c | 2 +- src/lib/solver/monomial_solve.c | 10 +- src/lib/solver/solver_field.c | 8 +- src/lib/spinor_fft.c | 10 +- src/lib/test/Makefile | 88 - src/lib/test/check_geometry.c | 30 +- src/lib/test/check_overlap.c | 18 +- src/lib/test/check_xchange.c | 68 +- src/lib/test/measure_rectangles.debug.c | 4 +- src/lib/update_backward_gauge.c | 2 +- src/lib/update_gauge.c | 8 +- src/lib/update_momenta_fg.c | 8 +- src/lib/update_tm.c | 8 +- src/lib/util/io.c | 2 +- src/lib/util/laguer/Makefile | 9 - src/lib/util/oox/Makefile | 46 - src/lib/wrapper/lib_wrapper.c | 6 +- src/lib/xchange/xchange_2fields.c | 16 +- src/lib/xchange/xchange_2fields.h | 2 +- src/lib/xchange/xchange_deri.c | 24 +- src/lib/xchange/xchange_field.c | 70 +- src/lib/xchange/xchange_gauge.c | 36 +- src/lib/xchange/xchange_halffield.c | 68 +- src/lib/xchange/xchange_lexicfield.c | 80 +- 131 files changed, 903 insertions(+), 9111 deletions(-) delete mode 100644 Makefile.global delete mode 100644 Makefile.in delete mode 100644 Makefile.tests create mode 100644 cmake/FindDDAlphaAMG.cmake rename cmake/{git_hash.h.in => git_hash.c.in} (62%) delete mode 100644 cmake_includes.txt delete mode 100644 config.guess delete mode 100644 config.sub delete mode 100644 configure.in rename {src/lib/profiling => profiling}/hmc/Readme.md (100%) rename {src/lib/profiling => profiling}/hmc/example_profile.pdf (100%) rename {src/lib/profiling => profiling}/hmc/profile.Rmd (100%) rename {src/lib/profiling => profiling}/hmc/timing.R (100%) rename {src/lib/profiling => profiling}/hmc_mk2/.gitignore (100%) rename {src/lib/profiling => profiling}/hmc_mk2/README.md (100%) rename {src/lib/profiling => profiling}/hmc_mk2/logs/example_log.out (99%) rename {src/lib/profiling => profiling}/hmc_mk2/make_profile.R (100%) rename {src/lib/profiling => profiling}/hmc_mk2/profile.Rmd (100%) delete mode 100644 qphix_base_classes.hpp delete mode 100644 qphix_interface.cpp delete mode 100644 qphix_interface.hpp delete mode 100644 qphix_interface_utils.hpp rename src/bin/{ => tests}/check_locallity.c (98%) rename src/bin/{ => tests}/hopping_test.c (94%) rename src/bin/{ => tests}/qphix_test_Dslash.c (99%) rename src/bin/{ => tests}/scalar_prod_r_test.c (100%) rename src/bin/{ => tests}/test_eigenvalues.c (98%) rename src/bin/{ => tests}/test_lemon.c (99%) rename fixed_volume.h.in => src/lib/fixed_volume.h.in (100%) create mode 100644 src/lib/git_hash.h delete mode 100644 src/lib/test/Makefile delete mode 100644 src/lib/util/laguer/Makefile delete mode 100644 src/lib/util/oox/Makefile diff --git a/CMakeLists.txt b/CMakeLists.txt index 9dc9f71f2..39adba1c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.24) project( tmlqcd DESCRIPTION "tmlQCD" - HOMEPAGE_URL "http://www.itkp.uni-bonn.de/~urbach/software.html" + HOMEPAGE_URL "https://github.com/etmc/tmLQCD" VERSION "6.0.0" LANGUAGES C CXX) @@ -80,26 +80,22 @@ option(TM_USE_FFTW "Enable fftw support" OFF) option(TM_USE_MPI "Enable MPI support" OFF) option(TM_USE_CUDA "Enable QUDA support" OFF) option(TM_USE_HIP "Enable HIP support" OFF) -option(TM_USE_DDALPHAAMG "Enable DDalphaAMG support" OFF) -option(TM_USE_OPENMP "Enable openMP" ON) -option(TM_FIXED_VOLUME "fix volume at compile time" OFF) -set( - TM_ENABLE_ALIGNMENT - "auto" - CACHE STRING "Automatically or expliclty align arrays to byte number. auto, none, 16, 32, 64") - -set_property( - CACHE TM_ENABLE_ALIGNMENT - PROPERTY STRINGS - "auto" - "none" - "16" - "32" - "64") +option(TM_USE_DDalphaAMG "Enable DDalphaAMG support" OFF) +option(TM_USE_OMP "Enable openMP" ON) +option(TM_FIXEDVOLUME "fix volume at compile time" OFF) +set(TM_ENABLE_ALIGNMENT + "auto" + CACHE + STRING + "Automatically or expliclty align arrays to byte number. auto, none, 16, 32, 64" +) + +set_property(CACHE TM_ENABLE_ALIGNMENT PROPERTY STRINGS "auto" "none" "16" "32" + "64") option(TM_BGL_DRAM "use BGL dram window (BGL only!)" ON) option(TM_USE_OPTIMIZATION "enable optimisation" ON) -option(TM_USE_GAUGE_COPY "Enable use of a copy of the gauge field" ON) +option(TM_USE_GAUGECOPY "Enable use of a copy of the gauge field" ON) option(TM_USE_HALFSPINOR "Use a Dirac Op. with halfspinor exchange" ON) option(TM_USE_TSPLITPAR "Enable timeslice-splitted communications" ON) option(TM_USE_QPHIX "enable QPhiX" OFF) @@ -110,8 +106,8 @@ option(TM_ENABLE_WARNINGS "Enable all warnings" ON) # MPI dependent options cmake_dependent_option( - TM_PERSISTENT_MPI "Use persistent MPI calls for halfspinor [default=no]" - OFF "TM_USE_MPI" OFF) + TM_PERSISTENT_MPI "Use persistent MPI calls for halfspinor [default=no]" OFF + "TM_USE_MPI" OFF) cmake_dependent_option( TM_NONBLOCKING_MPI "Use non-blocking MPI calls for spinor and gaug" ON "TM_USE_MPI" OFF) @@ -126,15 +122,15 @@ cmake_dependent_option(TM_USE_CUDA_HIP "Enable CUDA support in HIP" OFF "TM_USE_HIP" OFF) # clime and lemon depend on MPI -cmake_dependent_option(TM_USE_LEMON "Use the lemon io library" OFF - "TM_USE_MPI" ON) +cmake_dependent_option(TM_USE_LEMON "Use the lemon io library" OFF "TM_USE_MPI" + ON) # GPU dependent options cmake_dependent_option(TM_USE_QUDA_EXPERIMENTAL "Enable QUDA support" ON "TM_USE_QUDA" OFF) cmake_dependent_option( - TM_QUDA_FERMIONIC_FORCES "Enable support for fermionic forces using QUDA" - ON "TM_USE_QUDA" OFF) + TM_QUDA_FERMIONIC_FORCES "Enable support for fermionic forces using QUDA" ON + "TM_USE_QUDA" OFF) cmake_dependent_option(TM_USE_NVHPC "Enable Nvidia HPC toolkit" OFF "TM_USE_CUDA" OFF) @@ -143,7 +139,7 @@ cmake_dependent_option(TM_USE_NVHPC "Enable Nvidia HPC toolkit" OFF find_package(BLAS REQUIRED) # find_package(LAPACK REQUIRED) -set(HAVE_LAPACK ON) +set(TM_LAPACK ON) find_package(FLEX REQUIRED) # do we need bison ? find_package(BISON REQUIRED) @@ -154,77 +150,51 @@ set(PACKAGE_TARNAME "tmlqcd") set(PACKAGE_BUGREPORT "curbach@gmx.de") set(PACKAGE_STRING "${PROJECT_DESCRIPTION} ${PROJECT_VERSION}") -unset(TM_USE_MPI) -unset(TM_USE_OMP) -unset(HAVE_LIBLEMON) -unset(HAVE_LIBLIME) -unset(FIXEDVOLUME) -unset(_PERSISTENT) -unset(_NON_BLOCKING) -unset(HAVE_LIBQUDA) -unset(TM_USE_QUDA) -unset(TM_QUDA_EXPERIMENTAL) -unset(TM_QUDA_FERMIONIC_FORCES) -unset(DDalphaAMG) -unset(TM_USE_QPHIX) -unset(QPHIX_SOALEN) -unset(_NEW_GEOMETRY) -unset(_NON_BLOCKING) -unset(_USE_SHMEM) -unset(_USE_HALFSPINOR) set(ALIGN " ") set(ALIGN_BASE "0") set(ALIGN_BASE32 "0") set(ALIGN32 " ") message("${TM_ENABLE_ALIGNMENT}") -if (${TM_ENABLE_ALIGNMENT} STREQUAL "auto") +if(${TM_ENABLE_ALIGNMENT} STREQUAL "auto") set(ALIGN_BASE "0x00") set(ALIGN " ") set(ALIGN_BASE32 "0x00") set(ALIGN32 " ") -elseif (TM_ENABLE_ALIGNMENT EQUAL 16) +elseif(TM_ENABLE_ALIGNMENT EQUAL 16) set(ALIGN_BASE "0x0F") set(ALIGN "__attribute__ ((aligned (16)))") set(ALIGN_BASE32 "0x0F") set(ALIGN32 "__attribute__ ((aligned (16)))") -elseif (TM_ENABLE_ALIGNMENT EQUAL 32) +elseif(TM_ENABLE_ALIGNMENT EQUAL 32) set(ALIGN_BASE "0x2F") set(ALIGN "__attribute__ ((aligned (32)))") set(ALIGN_BASE32 "0x2F") set(ALIGN32 "__attribute__ ((aligned (32)))") -elseif (TM_ENABLE_ALIGNMENT EQUAL 64) +elseif(TM_ENABLE_ALIGNMENT EQUAL 64) set(ALIGN_BASE "0x3F") set(ALIGN "__attribute__ ((aligned (64)))") set(ALIGN_BASE32 "0x3F") set(ALIGN32 "__attribute__ ((aligned (64)))") else() - message(FATAL_ERROR "Unusable value for array alignment. Allowed values are: auto, none, 16, 32, 64") -endif() - -if(TM_USE_HALFSPINOR) - set(_USE_HALFSPINOR ON) -endif() - -if(TM_FIXED_VOLUME) - set(FIXEDVOLUME ON) -endif() - -if(TM_PERSISTENT_MPI) - set(_PERSISTENT ON) + message( + FATAL_ERROR + "Unusable value for array alignment. Allowed values are: auto, none, 16, 32, 64" + ) endif() if(TM_USE_MPI) find_package(MPI REQUIRED) - set(TM_USE_MPI ON) if(TM_NONBLOCKING_MPI) - set(_NON_BLOCKING ON) + set(TM_NONBLOCKING ON) + endif() + if(TM_PERSISTENT_MPI) + set(TM_PERSISTENT ON) endif() endif() -if(TM_USE_OPENMP) +if(TM_USE_OMP) find_package(OpenMP REQUIRED COMPONENTS C CXX) - set(TM_USE_OMP ON) endif() if(TM_USE_HDF5) @@ -233,24 +203,23 @@ endif() if(TM_USE_LEMON) find_package(Clemon REQUIRED) - set(HAVE_LIBLEMON ON) endif() find_package(CLime REQUIRED) -set(HAVE_LIBLIME ON) +set(TM_USE_LIME ON) if(TM_USE_QUDA) find_package(QUDA REQUIRED config) - set(HAVE_LIBQUDA ON) if(TM_USE_QUDA_EXPERIMENTAL) set(TM_QUDA_EXPERIMENTAL ON) endif() if(TM_QUDA_FERMIONIC_FORCES) set(TM_QUDA_FERMIONIC_FORCES ON) endif() - if(TM_USE_CUDA OR TM_USE_HIP) - set(TM_USE_QUDA ON) - endif() +endif() + +if(TM_USE_SHMEM) + message(INFO "SHMEM needs to be included") endif() if(TM_USE_CUDA AND TM_USE_HIP) @@ -268,7 +237,6 @@ if(TM_USE_CUDA OR QUDA_TARGET_CUDA) endif() endif() -message("QUDA_TARGET: ${QUDA_TARGET_CUDA}") if(TM_USE_HIP OR QUDA_TARGET_HIP) enable_language(hip) @@ -285,20 +253,15 @@ if(TM_USE_HIP OR QUDA_TARGET_HIP) endif() endif() -if(TM_USE_SHMEM) - set(_USE_SHMEM ON) -endif() - if(TM_USE_QPIHX) find_package(QPhiX REQUIRED) if(NOT TARGET tmlqcd::qphix) add_library(tmlqcd::qphix INTERFACE IMPORTED) set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_LINK_LIBRARIES - "${QPHIX_LIBRARIES}") + "${QPHIX_LIBRARIES}") set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "${QPHIX_INCLUDE_DIRS}") + "${QPHIX_INCLUDE_DIRS}") endif() - set(TM_USE_QPHIX ON) endif() # check for fftw3 (rely on pkgconfig). @@ -309,57 +272,60 @@ if(TM_USE_FFTW) endif() endif() +if(TM_USE_DDalphaAMG) + find_package(DDAlphaAMG REQUIRED) +endif() + # gprofiler -if (TM_USE_GPROF) +if(TM_USE_GPROF) set(PROFILE_FLAGS "-pg;-g") - if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "powerpc|powerpc64") + if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "powerpc|powerpc64") list(APPEND PROFILE_FLAGS "-qfullpath") endif() add_compile_options($:$PROFILE_FLAGS>) endif() -if (TM_ENABLE_WARNINGS) - add_compile_options( - $<$:-Wall> - $<$:-Wall>) +if(TM_ENABLE_WARNINGS) + add_compile_options($<$:-Wall> + $<$:-Wall>) endif() # check for the presence of clock_gettime in libc or librt -check_symbol_exists(clock_gettime "time.h" HAVE_CLOCK_GETTIME) -check_library_exists(rt clock_gettime "" HAVE_CLOCK_GETTIME_IN_RT) -check_function_exists(fseeko HAVE_FSEEKO) +check_symbol_exists(clock_gettime "time.h" TM_CLOCK_GETTIME) +check_library_exists(rt clock_gettime "" TM_CLOCK_GETTIME_IN_RT) +check_function_exists(fseeko TM_FSEEKO) # set the parallelization if(TM_USE_MPI) if(TM_MPI_DIMENSION EQUAL "1") # T parallelisation - set(PARALLELT ON) + set(TM_PARALLELT ON) elseif(TM_MPI_DIMENSION EQUAL "2") # XT parallelisation - set(PARALLELXT ON) + set(TM_PARALLELXT ON) elseif(TM_MPI_DIMENSION EQUAL "3") - set(PARALLELXYT ON) + set(TM_PARALLELXYT ON) # XYZ parallelisation elseif(TM_MPI_DIMENSION EQUAL "4") # timeslice-splitted communications - set(PARALLELXYZT ON) + set(TM_PARALLELXYZT ON) elseif(TM_MPI_DIMENSION EQUAL "X") - set(PARALLELX ON) + set(TM_PARALLELX ON) elseif(TM_MPI_DIMENSION EQUAL "XY") - set(PARALLELXY ON) + set(TM_PARALLELXY ON) elseif(TM_MPI_DIMENSION EQUAL "XYZ") - set(PARALLELXYZ ON) + set(TM_PARALLELXYZ ON) else() - set(PARALLELXYZT ON) + set(TM_PARALLELXYZT ON) endif() endif() # keep the autotool config.h header. configure_file("${PROJECT_SOURCE_DIR}/cmake/tmlqcd_config_internal.h.in" "${PROJECT_BINARY_DIR}/tmlqcd_config_internal.h" @ONLY) -configure_file("${PROJECT_SOURCE_DIR}/fixed_volume.h.in" +configure_file("${PROJECT_SOURCE_DIR}/src/lib/fixed_volume.h.in" "${PROJECT_BINARY_DIR}/fixed_volume.h" @ONLY) # check if git command exists find_program(GIT_EXE NAMES git) @@ -385,6 +351,6 @@ else() ) endif() -configure_file(cmake/git_hash.h.in git_hash.h @ONLY) +configure_file(cmake/git_hash.c.in git_hash.c @ONLY) add_subdirectory(src/lib) add_subdirectory(src/bin) diff --git a/Makefile.global b/Makefile.global deleted file mode 100644 index dc1eefcf1..000000000 --- a/Makefile.global +++ /dev/null @@ -1,64 +0,0 @@ -# This Makefile is included from the other Makefiles -# It contains some overall targets... - -# refresh Makefile and other stuff - - - -PROGRAMS_WITH_GIT_HASH := hmc_tm invert offline_measurement test_Dslash deriv_mg_tune - -.SUFFIXES: - -Makefile: ${top_srcdir}/Makefile.global $(srcdir)/Makefile.in $(abs_top_builddir)/config.status - cd $(abs_top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - -$(abs_top_builddir)/config.status: $(top_srcdir)/configure - ( cd ${abs_top_builddir} && $(SHELL) ./config.status --recheck ) - -$(abs_top_builddir)/include/tmlqcd_config_internal.h: $(top_srcdir)/include/tmlqcd_config_internal.h.in $(abs_top_builddir)/config.status $(top_srcdir)/configure - ( cd ${abs_top_builddir} && $(SHELL) ./config.status --header=include/tmlqcd_config_internal.h ) - -# rebuild configure if configure.in changes but ignore errors -# on many machines some of the macros fail to be recognized -# but the resulting configure still works -$(top_srcdir)/configure: $(top_srcdir)/configure.in - -( cd $(top_srcdir) && $(AUTOCONF) ) - -#dep rules - -# PROGRAMS_WITH_GIT_HASH require git_hash.h which is dynamically built by a phony make target -# to prevent too frequent building of git_hash (slowing down the build) -# we filter the list of all objects and treat these separately -$(addsuffix .d, $(filter-out ${PROGRAMS_WITH_GIT_HASH},${ALLOBJ})): %.d: ${srcdir}/%.c Makefile - @ $(CCDEP) ${DEPFLAGS} ${DEFS} ${INCLUDES} $< > $@ -$(addsuffix .d, $(filter-out ${PROGRAMS_WITH_GIT_HASH},${CXXMODULES})): %.d: ${srcdir}/%.cpp Makefile - @ $(CXXDEP) ${CXXDEPFLAGS} ${DEFS} ${INCLUDES} $< > $@ - -# dirty hack to prevent make from entering an infinite loop because a phony target is given as a real -# dependency (make will build invert.d and hmc_tm.d indefinitely) -# when git_hash.h does not exist (as checked using wildcard) it is given as a dependency of invert.d and hmc_tm.d -# once it exists, this is no longer the case -# while this does break updating of git_hash.h while the dependencies are built, this is quite -# irrelevant because it will be rebuilt during the compilation of either invert or hmc_tm -ifneq (git_hash.h, $(findstring git_hash.h,$(wildcard $(top_srcdir)/git_has*.h))) -$(addsuffix .d, $(filter ${PROGRAMS_WITH_GIT_HASH},${ALLOBJ})): %.d: ${srcdir}/%.c ${top_srcdir}/git_hash.h Makefile - @ $(CCDEP) ${DEPFLAGS} ${DEFS} ${INCLUDES} $< > $@ -else -$(addsuffix .d, $(filter ${PROGRAMS_WITH_GIT_HASH},${ALLOBJ})): %.d: ${srcdir}/%.c Makefile - @ $(CCDEP) ${DEPFLAGS} ${DEFS} ${INCLUDES} $< > $@ -endif - -${top_builddir}/fixed_volume.h: ${top_srcdir}/fixed_volume.h.in ${top_builddir}/config.status - cd ${abs_top_builddir} && CONFIG_FILES=fixed_volume.h CONFIG_HEADERS= $(SHELL) ${top_builddir}/config.status - -all-recursive all-debug-recursive all-profile-recursive clean-recursive distclean-recursive compile-clean-recursive: Makefile - @set fnord ${MAKEFLAGS}; amf=$$2; \ - dot_seen=no; \ - target=`echo $@ | sed s/-recursive//`; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - echo "Making $$target in $$subdir"; \ - local_target="$$target"; \ - ( cd $$subdir && $(MAKE) $$local_target ) \ - || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ - done; test -z "$$fail"; diff --git a/Makefile.in b/Makefile.in deleted file mode 100644 index 51437ff05..000000000 --- a/Makefile.in +++ /dev/null @@ -1,167 +0,0 @@ -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -abs_top_srcdir = @abs_top_srcdir@ -top_builddir = . -abs_top_builddir = @abs_top_builddir@ -builddir = @builddir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ -bindir = @bindir@ -program_transform_name = @program_transform_name@ -subdir = . - -AR = @AR@ -RANLIB = @RANLIB@ -CC = @CC@ -CXX = @CXX@ -CCDEP = @CCDEP@ -CXXDEP = @CXXDEP@ -CFLAGS = @CFLAGS@ -CXXFLAGS = @CXXFLAGS@ -LDFLAGS = @LDFLAGS@ -DEPFLAGS = @DEPFLAGS@ -CXXDEPFLAGS = @CXXDEPFLAGS@ -CPPFLAGS = @CPPFLAGS@ -CCLD = @CCLD@ -LEX = @LEX@ -AUTOCONF = @AUTOCONF@ -LIBS = @LIBS@ -SHELL = @SHELL@ -OPTARGS = @OPTARGS@ -SOPTARGS = @SOPTARGS@ -DEFS = @DEFS@ -USESUBDIRS = @USESUBDIRS@ -NVCC = @NVCC@ -GPUMPICOMPILER = @GPUMPICOMPILER@ - -INCLUDES = @INCLUDES@ -LINK = $(CCLD) -o $@ ${LDFLAGS} - -COMPILE = ${CC} ${DEFS} ${INCLUDES} -o $@ ${CFLAGS} -CXXCOMPILE = ${CXX} ${DEFS} ${INCLUDES} -o $@ ${CXXFLAGS} ${LDFLAGS} - -SMODULES = - -MODULES = read_input gamma measure_gauge_action start \ - expo matrix_utils get_staples update_backward_gauge \ - measure_rectangles get_rectangle_staples \ - test/check_geometry test/check_xchange \ - test/overlaptests \ - invert_eo invert_doublet_eo update_gauge \ - getopt sighandler reweighting_factor \ - source_generation boundary update_tm ranlxd \ - mpi_init deriv_Sb deriv_Sb_D_psi ranlxs \ - geometry_eo invert_overlap aligned_malloc \ - prepare_source chebyshev_polynomial_nd Ptilde_nd \ - reweighting_factor_nd rnd_gauge_trafo \ - update_momenta update_momenta_fg integrator phmc \ - little_D block operator \ - spinor_fft \ - fatal_error invert_clover_eo gettime \ - tm_debug_printf compare_derivative \ - @QUDA_INTERFACE@ @DDalphaAMG_INTERFACE@ - -CXXMODULES = @QPHIX_INTERFACE@ - -NOOPTMOD = test/check_xchange test/check_geometry - -PROGRAMS = hmc_tm benchmark invert gen_sources \ - check_locallity test_lemon hopping_test \ - offline_measurement deriv_mg_tune @QPHIX_PROGRAMS@ - -ALLOBJ = ${MODULES} ${PROGRAMS} ${SMODULES} -SUBDIRS = ${USESUBDIRS} - -# delete the default suffix rules -.SUFFIXES: - -# need to build modules before subdirs! -all: Makefile dep $(SUBDIRS) hmc_tm invert benchmark offline_measurement deriv_mg_tune @QPHIX_PROGRAMS@ - -$(SUBDIRS): - $(MAKE) --directory=$@ - -# run the GIT-VERSION-GEN script to generate version information in git_hash.h -# making sure that we run in the correct directory -${top_srcdir}/git_hash.h: - @ ( cd @srcdir@ && sh GIT-VERSION-GEN ) - --include $(addsuffix .d,$(ALLOBJ)) --include $(addsuffix .d,$(CXXMODULES)) - -include ${top_srcdir}/Makefile.global - -# follow https://www.owlfolio.org/possibly-useful/flex-input-scanner-rules-are-too-complicated/ -# and pass the -Ca option such that more than 32k "NFA" states are allowed -# our ruleset is so complicated that this has become necessary! -${top_srcdir}/read_input.c: ${top_srcdir}/read_input.l -ifneq (,$(findstring lex,${LEX})) - ${LEX} -Ca -Ptmlqcd -i -t ${top_srcdir}/read_input.l > ${top_srcdir}/read_input.c -else - $(error Unable to find (f)lex, read_input.c not built. Please install (f)lex!) -endif - -libhmc.a: ${addsuffix .o, ${MODULES} ${SMODULES}} Makefile - @rm -f libhmc.a - @${AR} cru libhmc.a ${addsuffix .o, ${MODULES} ${SMODULES}} - @$(RANLIB) libhmc.a - @cp libhmc.a ${top_builddir}/lib/libhmc.a - -$(addsuffix .o,$(filter-out ${NOOPTMOD},${MODULES})): %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/include/tmlqcd_config_internal.h - ${COMPILE} ${OPTARGS} -c $< - -#here we don't need optimisation -$(addsuffix .o,$(filter ${NOOPTMOD},${MODULES})): %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/include/tmlqcd_config_internal.h - ${COMPILE} -c $< - -${addsuffix .o, ${SMODULES}}: %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/include/tmlqcd_config_internal.h - ${COMPILE} ${SOPTARGS} -c $< - -# C++ modules -$(addsuffix .o,${CXXMODULES}): %.o: ${srcdir}/%.cpp %.d Makefile $(abs_top_builddir)/include/tmlqcd_config_internal.h - ${CXXCOMPILE} -c $< - -${addsuffix .o, ${PROGRAMS}}: %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/include/tmlqcd_config_internal.h ${top_srcdir}/git_hash.h - ${COMPILE} ${OPTARGS} -c $< - -${PROGRAMS}: %: %.o libhmc.a $(SUBDIRS) $(addsuffix .o,${CXXMODULES}) - ${LINK} $@.o $(addsuffix .o,${CXXMODULES}) $(GPUOBJECTS) $(GPUOBJECTS_C) $(LIBS) ${LDFLAGS} - - -# The rules for unit tests are kept in a separate file for tidyness -include ${top_srcdir}/Makefile.tests - -dep: $(addsuffix .d,$(ALLOBJ)) $(addsuffix .d,$(CXXMODULES)) - @ echo "...dependency files built" - -install: Makefile - @mkdir -p $(bindir); \ - for p in hmc_tm invert benchmark offline_measurement deriv_mg_tune; do \ - progname=`echo $$p | sed '$(program_transform_name)'`; \ - echo "Installing $$p as $$progname in $(bindir)..."; \ - cp $$p $(bindir)/$$progname; \ - done; \ - echo "done"; - -uninstall: Makefile - for p in hmc_tm invert benchmark offline_measurement deriv_mg_tune; do \ - progname=`echo $$p | sed '$(program_transform_name)'`; \ - echo "Un-Installing $$progname in $(bindir)..."; \ - rm $(bindir)/$$progname; \ - done; \ - echo "done"; - -compile-clean: compile-clean-recursive Makefile - rm -f *.o *.d test/*.o test/*.d tests/*.o tests/*.d - -clean: clean-recursive Makefile - rm -f benchmark hmc_tm invert offline_measurement test_Dslash deriv_mg_tune @QPHIX_PROGRAMS@ *.o *.d test/*.o test/*.d tests/*.o tests/*.d - -distclean: distclean-recursive Makefile - rm -f benchmark hmc_tm invert offline_measurement *.o *.d *~ Makefile config.log config.status fixed_volume.h - rm -f include/tmlqcd_config_internal.h - -.PHONY: all ${SUBDIRS} ${top_srcdir}/git_hash.h clean compile-clean distclean dep install \ - all-recursive all-debug-recursive all-profile-recursive \ - clean-recursive distclean-recursive \ - compile-clean-recursive diff --git a/Makefile.tests b/Makefile.tests deleted file mode 100644 index a9a393ac6..000000000 --- a/Makefile.tests +++ /dev/null @@ -1,64 +0,0 @@ -TESTS = tests/test_sample tests/test_su3 tests/test_buffers tests/test_qpx tests/test_linalg tests/test_clover tests/test_rat - -TEMP = $(patsubst %.c,%,$(wildcard $(top_srcdir)/tests/*.c)) -TESTMODULES = $(patsubst $(top_srcdir)/%,%,$(TEMP)) - -TESTFLAGS = -L$(top_builddir)/cu/ -lcu - -$(addsuffix .o,$(TESTMODULES)): %.o : $(top_srcdir)/%.c - ${COMPILE} -c $(OPTARGS) ${DEFS} $< - -# The linking stage needs to be differentiated because different tests rely on -# different modules from the codebase -# Each test itself consists of a number of modules that need to be linked. - -# when used as a prerequisite, the wildcard with "tests/test_sample*.c" replaced by "$@*.c" is not evaluated -# correctly, even though it works perfectly in an echo statement, it results in make -# trying to compile all objects in top_srcdir -# we therefore evaluate the wildcard into a variable - -TEST_SAMPLE_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_sample*.c)) -TEST_SAMPLE_FLAGS:= -TEST_SAMPLE_LIBS:=$(top_builddir)/cu/libcu.a -tests/test_sample: $(TEST_SAMPLE_OBJECTS) $(TEST_SAMPLE_LIBS) - ${LINK} $(TEST_SAMPLE_OBJECTS) $(TESTFLAGS) $(TEST_SAMPLE_FLAGS) - -TEST_SU3_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_su3*.c)) expo.o -TEST_SU3_FLAGS:=-lm -TEST_SU3_LIBS:=$(top_builddir)/cu/libcu.a -tests/test_su3: $(TEST_SU3_OBJECTS) $(TEST_SU3_LIBS) - ${LINK} $(TEST_SU3_OBJECTS) $(TESTFLAGS) $(TEST_SU3_FLAGS) - -TEST_QPX_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_qpx*.c)) -TEST_QPX_FLAGS:=-lm -TEST_QPX_LIBS:=$(top_builddir)/cu/libcu.a -tests/test_qpx: $(TEST_QPX_OBJECTS) $(TEST_QPX_LIBS) - ${LINK} $(TEST_QPX_OBJECTS) $(TESTFLAGS) $(TEST_QPX_FLAGS) - -TEST_LINALG_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_linalg*.c)) -TEST_LINALG_FLAGS:=-lm -TEST_LINALG_LIBS:=$(top_builddir)/cu/libcu.a $(top_builddir)/linalg/liblinalg.a -tests/test_linalg: $(TEST_LINALG_OBJECTS) $(TEST_LINALG_LIBS) - ${LINK} $(TEST_LINALG_OBJECTS) $(TEST_LINALG_LIBS) $(TESTFLAGS) $(TEST_LINALG_FLAGS) - -TEST_BUFFERS_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_buffers*.c)) fatal_error.o -TEST_BUFFERS_FLAGS:=-lbuffers -L$(top_builddir)/buffers/ -TEST_BUFFERS_LIBS:=$(top_builddir)/cu/libcu.a $(top_builddir)/buffers/libbuffers.a -tests/test_buffers: $(TEST_BUFFERS_OBJECTS) $(TEST_BUFFERS_LIBS) - ${LINK} $(TEST_BUFFERS_OBJECTS) $(TESTFLAGS) $(TEST_BUFFERS_FLAGS) - -TEST_CLOVER_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_clover*.c)) operator/clover_leaf.o -TEST_CLOVER_FLAGS:=-lm -lhmc -llinalg -TEST_CLOVER_LIBS:=$(top_builddir)/cu/libcu.a -tests/test_clover: $(TEST_CLOVER_OBJECTS) $(TEST_CLOVER_LIBS) - ${LINK} $(TEST_CLOVER_OBJECTS) $(TESTFLAGS) $(TEST_CLOVER_FLAGS) - -TEST_RAT_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_rat*.c)) -TEST_RAT_FLAGS:=-lm -lrational -TEST_RAT_LIBS:=$(top_builddir)/cu/libcu.a -tests/test_rat: $(TEST_RAT_OBJECTS) $(TEST_RAT_LIBS) - ${LINK} $(TEST_RAT_OBJECTS) $(TESTFLAGS) $(TEST_RAT_FLAGS) - - -tests: ${TESTS} - diff --git a/cmake/FindDDAlphaAMG.cmake b/cmake/FindDDAlphaAMG.cmake new file mode 100644 index 000000000..f42c943cc --- /dev/null +++ b/cmake/FindDDAlphaAMG.cmake @@ -0,0 +1,29 @@ +include(FindPackageHandleStandardArgs) + +find_library( + TM_DDALPHAAMG_LIBRARIES + NAMES DDalphaAMG DDalphaAMG_devel + PATH_SUFFIXES "lib" "lib64") + +find_path( + TM_DDALPHAAMG_INCLUDE_DIRS + NAMES DDalphaAMG.h + PATH_SUFFIXES "include" "include/${_pacakge_name}" "${_package_name}") + +find_package_handle_standard_args( + DDAlphaAMG DEFAULT_MSG TMLQCD_DDALPHAAMG_LIBRARIES + TMLQCD_DDALPHAAMG_INCLUDE_DIRS) + +if(NOT TARGET tmlqcd::DDalphaAMG) + add_library(tmlqcd::DDalphaAMG INTERFACE IMPORTED) + set_target_properties( + tmlqcd::DDalphaAMG PROPERTIES INTERFACE_LINK_LIBRARIES + "${TMLQCD_DDALPHAAMG_LIBRARIES}") + set_target_properties( + tmlqcd::DDalphaAMG PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + "${TMLQCD_DDALPHAAMG_INCLUDE_DIRS}") +endif() + +set(TMLQCD_DDALPHAAMG_FOUND ON) +mark_as_advanced(TMLQCD_DDALPHAAMG_FOUND TMLQCD_DDALPHAAMG_LIBRARIES + TMLQCD_DDALPHAAMG_INCLUDE_DIRS) diff --git a/cmake/git_hash.h.in b/cmake/git_hash.c.in similarity index 62% rename from cmake/git_hash.h.in rename to cmake/git_hash.c.in index 23f624742..912085abb 100644 --- a/cmake/git_hash.h.in +++ b/cmake/git_hash.c.in @@ -1,6 +1,6 @@ #ifndef _GIT_HASH_H #define _GIT_HASH_H -const char git_hash[] = "@TMLQCD_SHA@"; +const char git_hash[] = "@TM_SHA@"; #endif /* _GIT_HASH_H */ diff --git a/cmake/tmlqcd_config_internal.h.in b/cmake/tmlqcd_config_internal.h.in index 5dd9c7096..2765a2b7c 100644 --- a/cmake/tmlqcd_config_internal.h.in +++ b/cmake/tmlqcd_config_internal.h.in @@ -3,20 +3,17 @@ * into static const variables, following the convention used by the USQCD build * systems, for example. */ -/* We are on a CRAY */ -#cmakedefine CRAY - /* lapack available */ -#cmakedefine HAVE_LAPACK +#cmakedefine TM_LAPACK /* Define to 1 if you have the `lime' library (-llime). */ -#cmakedefine HAVE_LIBLIME +#cmakedefine TM_USE_LIME /* Define to 1 if you have the `lemon' library (-llemon). */ -#cmakedefine HAVE_LIBLEMON +#cmakedefine TM_USE_LEMON /* 1 if clock_gettime is available for use in benchmark */ -#cmakedefine HAVE_CLOCK_GETTIME +#cmakedefine TM_CLOCK_GETTIME /* Compile with MPI support */ #cmakedefine TM_USE_MPI @@ -25,7 +22,7 @@ #cmakedefine TM_USE_OMP /* Compile with FFTW support */ -#cmakedefine HAVE_FFTW +#cmakedefine TM_USE_FFTW /* Fortran has not extra _ */ #cmakedefine NOF77_ @@ -45,31 +42,31 @@ #define PACKAGE_VERSION "@PROJECT_DESCRIPTION@ @PROJECT_VERSION@" /* X parallelisation */ -#cmakedefine PARALLELX +#cmakedefine TM_PARALLELX /* XY parallelisation */ -#cmakedefine PARALLELXY +#cmakedefine TM_PARALLELXY /* XYZ parallelisation */ -#cmakedefine PARALLELXYZ +#cmakedefine TM_PARALLELXYZ /* One dimensional parallelisation */ -#cmakedefine PARALLELT +#cmakedefine TM_PARALLELT /* Two dimensional parallelisation */ -#cmakedefine PARALLELXT +#cmakedefine TM_PARALLELXT /* Three dimensional parallelisation */ -#cmakedefine PARALLELXYT +#cmakedefine TM_PARALLELXYT /* Four dimensional parallelisation */ -#cmakedefine PARALLELXYZT +#cmakedefine TM_PARALLELXYZT /* Fixed volume at compiletime */ -#cmakedefine FIXEDVOLUME +#cmakedefine TM_FIXEDVOLUME /* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ -#cmakedefine HAVE_FSEEKO +#cmakedefine TM_FSEEKO /* Alignment for arrays -- necessary for SSE and automated vectorization */ #define ALIGN_BASE @ALIGN_BASE@ @@ -88,40 +85,37 @@ #cmakedefine YYTEXT_POINTER /* Number of bits in a file offset, on hosts where this is settable. */ -#cmakedefine _FILE_OFFSET_BITS @TMLQCD_FILE_OFFSET_BITS@ +#cmakedefine TM_FILE_OFFSET_BITS @TMLQCD_FILE_OFFSET_BITS@ /* Construct an extra copy of the gauge fields */ -#cmakedefine _GAUGE_COPY +#cmakedefine TM_USE_GAUGECOPY /* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ -#cmakedefine _LARGEFILE_SOURCE +#cmakedefine TM_LARGEFILE_SOURCE /* Define for large files, on AIX-style hosts. */ -#cmakedefine _LARGE_FILES +#cmakedefine TM_LARGE_FILES /* Use even/odd geometry in the gauge fields */ -#cmakedefine _NEW_GEOMETRY +#cmakedefine TM_NEW_GEOMETRY /* x86 64 Bit architecture */ -#cmakedefine _x86_64 +#cmakedefine TM_x86_64 /* Define to 1 if Dirac operator with halfspinor should be used */ -#cmakedefine _USE_HALFSPINOR +#cmakedefine TM_USE_HALFSPINOR /* Define to 1 if shmem API should be used */ -#cmakedefine _USE_SHMEM +#cmakedefine TM_USE_SHMEM /* Define to 1 if KOJAK instrumentalisation should be done*/ -#cmakedefine _KOJAK_INST +#cmakedefine TM_KOJAK_INST /* Define to 1 if persistent MPI calls for halfspinor should be used */ -#cmakedefine _PERSISTENT +#cmakedefine TM_PERSISTENT /* Define to 1 if non-blocking MPI calls for spinor and gauge should be used */ -#cmakedefine _NON_BLOCKING - -/* Define to 1 if you have the `quda' library (-lquda). */ -#cmakedefine HAVE_LIBQUDA +#cmakedefine TM_NONBLOCKING /* Using QUDA GPU */ #cmakedefine TM_USE_QUDA @@ -133,7 +127,7 @@ #cmakedefine TM_QUDA_FERMIONIC_FORCES /* Using DDalphaAMG */ -#cmakedefine DDalphaAMG +#cmakedefine TM_USE_DDalphaAMG /* Using QPHIX */ #cmakedefine TM_USE_QPHIX diff --git a/cmake_includes.txt b/cmake_includes.txt deleted file mode 100644 index b8e105cc0..000000000 --- a/cmake_includes.txt +++ /dev/null @@ -1,425 +0,0 @@ -LIST(APPEND IO_SRC_C io_srcio/utils_write_inverter_info.c -io/gauge_read.c -io/utils_write_xlf.c -io/utils_construct_reader.c -io/params_construct_xlfInfo.c -io/utils_kill_with_error.c -io/DML_crc32.c -io/spinor_write_source_format.c -io/deri_write_stdout.c -io/spinor_write_propagator_format.c -io/utils_engineering.c -io/utils_parse_propagator_type.c -io/io_cm.c -io/utils_parse_ildgformat_xml.c -io/utils_read_message.c -io/utils_write_ildg_format.c -io/utils_destruct_writer.c -io/gauge_write.c -io/utils_write_message.c -io/params_construct_ildgFormat.c -io/spinor_read.c -io/utils_close_reader_record.c -io/spinor_read_binary.c -io/utils.c -io/spinor_write_stdout.c -io/spinor_write_info.c -io/utils_write_checksum.c -io/utils_write_header.c -io/eospinor_read.c -io/utils_write_first_message.c -io/params_construct_InverterInfo.c -io/utils_parse_checksum_xml.c -io/utils_construct_writer.c -io/sw_write_stdout.c -io/spinor_write_propagator_type.c -io/gauge_write_binary.c -io/spinor_write.c -io/utils_write_xlf_xml.c -io/params_construct_propagatorFormat.c -io/gauge_read_binary.c -io/dml.c -io/spinor_write_binary.c -io/utils_destruct_reader.c -io/utils_close_writer_record.c -io/eospinor_write.c -io/gauge_write_luscher_binary.c -io/params_construct_sourceFormat.c) - -list(APPEND INIT_SRC_C init/init_dirac_halfspinor.c - init/init_geometry_indices.c - init/init_openmp.c - init/init_gauge_field.c - init/init_parallel.c - init/init_chi_spinor_field.c - init/init_gauge_fg.c - init/init_spinor_field.c - init/init_global_states.c - init/init_bispinor_field.c - init/init_gauge_tmp.c - init/init_critical_globals.c - init/init_omp_accumulators.c - init/init_jacobi_field.c - init/init_stout_smear_vars.c - init/init_moment_field.c) - -list(APPEND SOLVER_SRC_C -solver/bicg_complex.c -solver/dfl_projector.c -solver/eigenvalues_Jacobi.c -solver/gcr.c -solver/gmres_precon.c -solver/chrono_guess.c -solver/gcr4complex.c -solver/jdher.c -solver/gcr4complex_body.c -solver/gmres_dr.c -solver/fgmres4complex_body.c -solver/cg_her_bi.c -solver/solver_field.c -solver/quicksort.c -solver/bicgstab2.c -solver/cgs_real.c -solver/M_plus_block_psi_body.c -solver/little_mg_precon_body.c -solver/cg_her_su3vect.c -solver/little_project_eo_body.c -solver/monomial_solve.c -solver/cr.c -solver/gram-schmidt.c -solver/solver_types.c -solver/mode_number.c -solver/cg_her.c -solver/jdher_bi.c -solver/mrblk_body.c -solver/eigcg.c -solver/jdher_su3vect.c -solver/poly_precon.c -solver/Msap.c -solver/fgmres.c -solver/dirac_operator_eigenvectors.c -solver/incr_eigcg.c -solver/index_jd.c -solver/sumr.c -solver/cgne4complex.c -solver/eigenvalues_bi.c -solver/gmres.c -solver/lu_solve.c -solver/diagonalise_general_matrix.c -solver/mcr.c -solver/bicgstabell.c -solver/rg_mixed_cg_her.c -solver/mixed_cg_her.c -solver/mixed_cg_mms_tm_nd.c -solver/rg_mixed_cg_her_nd.c -solver/spectral_proj.c -solver/restart_X.c -solver/generate_dfl_subspace.c -solver/eigenvalues.c -solver/mcr4complex.c -solver/mr4complex.c -solver/bicgstab_complex.c -solver/cg_mms_tm_nd.c -solver/mr.c -solver/cg_her_nd.c -solver/bicgstab_complex_bi.c -solver/sub_low_ev.c -solver/ortho.c -solver/pcg_her.c -solver/fgmres4complex.c -solver/cg_mms_tm.c -solver/init_guess.c) - -list(APPEND LINALG_SRC_C linalg/assign_mul_bra_add_mul_r.c - linalg/mul_r_gamma5.c - linalg/convert_eo_to_lexic.c - linalg/print_spinor.c - linalg/assign_add_mul_body.c - linalg/mul_diff_mul_r.c - linalg/square_norm_32.c - linalg/mul.c - linalg/mul_r.c - linalg/mul_gamma5.c - linalg/ratio.c - linalg/square_norm.c - linalg/mul_diff_mul.c - linalg/square_and_minmax.c - linalg/add.c - linalg/assign_add_mul_add_mul_r.c - linalg/comp_decomp.c - linalg/mul_add_mul.c - linalg/diff_32.c - linalg/assign_add_mul.c - linalg/addto_32.c - linalg/assign_mul_add_mul_add_mul_add_mul_r.c - linalg/assign_add_mul_r.c - linalg/diff.c - linalg/assign_mul_add_mul_r.c - linalg/scalar_prod_r.c - linalg/assign_to_32.c - linalg/assign_add_mul_add_mul.c - linalg/mul_diff_r.c - linalg/assign_mul_add_r_and_square.c - linalg/assign_mul_add_mul_r_32.c - linalg/assign_mul_add_mul.c - linalg/assign_mul_add_mul_add_mul_r.c - linalg/scalar_prod_r_32.c - linalg/assign_mul_add_r.c - linalg/assign_mul_add_r_32.c - linalg/scalar_prod_su3spinor.c - linalg/convert_even_to_lexic.c - linalg/mul_r_32.c - linalg/assign_add_mul_r_add_mul.c - linalg/convert_odd_to_lexic.c - linalg/diff_and_square_norm.c - linalg/scalar_prod_i.c - linalg/mul_add_mul_r.c - linalg/assign_diff_mul.c - linalg/assign_mul_bra_add_mul_ket_add_r.c - linalg/set_even_to_zero.c - linalg/assign_mul_add.c - linalg/square_and_prod_r.c - linalg/scalar_prod_body.c - linalg/assign_mul_bra_add_mul_ket_add.c - linalg/assign_add_mul_r_32.c - linalg/scalar_prod.c - linalg/mattimesvec.c - linalg/assign.c - linalg/print_spinor_similar_components.c) - -list(APPEND RATIONAL_SRC_C rational/zolotarev.c - rational/rational.c - rational/elliptic.c) - -list(APPEND OPERATOR_SRC_C operator/clover_invert.c - operator/hopping_body_dbl.c - operator/tm_operators_nd_32.c - operator/hopping_sse_dbl.c - operator/halfspinor_body.c - operator/Block_D_psi_body.c - operator/mul_one_pm_imu_sub_mul_body.c - operator/assign_mul_one_sw_pm_imu_site_lexic_body.c - operator/assign_mul_one_sw_pm_imu_inv_block_body.c - operator/clover_accumulate_deriv.c - operator/Hopping_Matrix.c - operator/hopping_bg_dbl.c - operator/tm_operators.c - operator/tm_times_Hopping_Matrix.c - operator/clovertm_operators_32.c - operator/hopping_sgl.c - operator/Dov_proj.c - operator/clover_deriv.c - operator/halfspinor_bg_dbl.c - operator/clover_det.c - operator/clover_leaf.c - operator/D_psi_body.c - operator/clovertm_operators.c - operator/hopping_sse_sgl.c - operator/halfspinor_sse_dbl.c - operator/Dov_psi.c - operator/tm_operators_nd.c - operator/tm_sub_Hopping_Matrix.c - operator/Hopping_Matrix_nocom.c - operator/clover_term.c - operator/halfspinor_bgq_dbl.c - operator/Hopping_Matrix_32_nocom.c - operator/D_psi.c - operator/tm_operators_32.c - operator/Hopping_Matrix_32.c - operator/halfspinor_body_32.c - operator/mul_one_pm_imu_inv_body.c) - -list(APPEND SMEARING_SRC_C smearing/hex_stout_exclude_two.c - smearing/hex_hex_smear.c - smearing/utils_print_su3.c - smearing/hyp_APE_project_exclude_none.c - smearing/hyp_hyp_staples_exclude_one.c - smearing/hyp_APE_project_exclude_one.c - smearing/hex_stout_exclude_one.c - smearing/hyp_hyp_staples_exclude_two.c - smearing/hex_stout_exclude_none.c - smearing/stout_stout_smear.c - smearing/hyp_hyp_smear.c - smearing/hyp_APE_project_exclude_two.c - smearing/utils_project_herm.c - smearing/utils_reunitarize.c - smearing/utils_generic_staples.c - smearing/hyp_hyp_staples_exclude_none.c - smearing/ape_ape_smear.c - smearing/uils_print_config_to_screen.c - smearing/utils_project_antiherm.c - smearing/utils_print_config_to_screen.c - smearing/utils_reunitarize_MILC.c) - -list(APPEND BUFFER_SRC_C - buffers/gauge_return_gauge_field.c - buffers/gauge_get_gauge_field.c - buffers/gauge_finalize_gauge_buffers.c - buffers/gauge_initialize_gauge_buffers.c - buffers/gauge.c - buffers/gauge_free_unused_gauge_buffers.c - buffers/gauge_get_gauge_field_array.c - buffers/utils_generic_exchange.c - buffers/gauge_allocate_gauge_buffers.c - buffers/gauge_return_gauge_field_array.c) - -list(APPEND MONOMIAL_SRC_C - monomial/detratio_monomial.c - monomial/sf_gauge_monomial.c - monomial/poly_monomial.c - monomial/cloverdetratio_monomial.c - monomial/ndrat_monomial.c - monomial/cloverdet_monomial.c - monomial/clover_trlog_monomial.c - monomial/cloverndpoly_monomial.c - monomial/monitor_forces.c - monomial/ndpoly_monomial.c - monomial/det_monomial.c - monomial/monomial.c - monomial/cloverdetratio_rwmonomial.c - monomial/gauge_monomial.c - monomial/clovernd_trlog_monomial.c - monomial/ratcor_monomial.c - monomial/nddetratio_monomial.c - monomial/rat_monomial.c - monomial/ndratcor_monomial.c - monomial/moment_energy.c) - -list(APPEND EXCHANGE_SRC_C xchange/xchange_lexicfield.c -xchange/xchange_2fields.c -xchange/xchange_gauge.c -xchange/xchange_halffield.c -xchange/xchange_jacobi.c -xchange/little_field_gather_body.c -xchange/little_field_gather.c -xchange/xchange_deri.c -xchange/xchange_field.c -xchange/xchange_field_tslice.c) - -list(APPEND MEAS_SRC_C -meas/pion_norm.c -meas/correlators.c -meas/polyakov_loop.c -meas/measurements.c -meas/oriented_plaquettes.c -meas/gradient_flow.c -meas/measure_clover_field_strength_observables.c) - -list(APPEND SF_SRC_C sf/sf_calc_action.c - sf/sf_get_rectangle_staples.c - sf/sf_get_staples.c - sf/sf_observables.c - sf/sf_utils.c - ) - -list(APPEND MAIN_SRC_C -measure_gauge_action.c -start.c -deriv_Sb.c -reweighting_factor_nd.c -ranlxs.c -source_generation.c -read_input.c -invert_doublet_eo.c -geometry_eo.c -getopt.c -offline_measurement.c -tm_debug_printf.c -chebyshev_polynomial_nd.c -invert_eo.c -little_D.c -get_rectangle_staples.c -gen_sources.c -rnd_gauge_trafo.c -test_lemon.c -LapH_ev.c -benchmark.c -measure_rectangles.c -check_locallity.c -invert.c -deriv_Sb_D_psi.c -deriv_mg_tune.c -mpi_init.c -update_momenta_fg.c -gamma.c -matrix_utils.c -reweighting_factor.c -update_tm.c -jacobi.c -invert_overlap.c -phmc.c -get_staples.c -clenshaw_coef.c -block.c -spinor_fft.c -boundary.c -little_D_body.c -X_psi.c -prepare_source.c -DDalphaAMG_interface.c -update_backward_gauge.c -invert_clover_eo.c -gettime.c -hmc_tm.c -update_momenta.c -sighandler.c -compare_derivative.c -ranlxd.c -DirectPut.c -aligned_malloc.c -fatal_error.c -operator.c -cu/cu.c -chebyshev_polynomial.c -qphix_test_Dslash.c -expo.c -overrelaxation.c -Ptilde_nd.c -update_gauge.c -hopping_test.c -integrator.c -P_M_eta.c) - -if (TMLQCD_USE_QPHIX) -list(APPEND MAIN_SRC_C qphix_interface.cpp) -endif() - -if (TMLQCD_USE_QUDA) -list(APPEND MAIN_SRC_C quda_interface.c) -endif() - -list(APPEND ALL_SRC ${MAIN_SRC_C} ${SF_SRC_C} ${XCHANGE_SRC_C} ${MONOMIAL_SRC_C} ${BUFFER_SRC_C} ${SMEARING_SRC_C} ${OPERATOR_SRC_C} ${RATIONAL_SRC_C} ${LINALG_SRC_C} ${IO_SRC_C} ${INIT_SRC_C} ${SOLVER_SRC_C}) - -include_directories(${CMAKE_CURRENT_BINARY_DIR}) - -# cmake 4.0 uses a different syntax for the option -flex_target(tmlqcd_input_read input_read.l input_read.c - $<$:COMPILE_FLAGS "-Ca -Ptmlqcd"> - $<$:OPTIONS "-Ca;-Ptmlqcd">) - -# create a target library with namespacing because cmake does not know name space at all -add_library(tmlqcd::hmc ALL_SRC ${FLEX_tmlqcd_input_read_OUTPUTS}) -set_target_properties(tmlqcd::hmc PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION 1) - -# define a library and add the dependencies -target_link_libraries(tmlqcd::hmc - $<$:rt> - $<$:tmlqcd::lime> - $<$:tmlqcd::lemon> - $<$:tmlqcd::qphix> - $<$:tmlqcd::fftw3> - $<$:MPI::MPI_C MPI::MPI_CXX> - $<$:quda::quda> - $<$:CUDA::cufft CUDA::cufftw CUDA::cublas CUDA::cudart CUDA::cuda_driver> - $<$:hip::hipfft roc::hipblas hip::host> - ${LAPACK_LIBRARIES} - ${BLAS_LIBRARIES} - $<$:OpenMP::OpenMP_C OpenMP::OpenMP_CXX> - m) - -target_compile_definitions(tmlqcd::hmc - $<$:${TMLQCD_GPU_PLATFORM_DFLAGS}> - ) - -target_include_directories(tmlqcd::hmc PUBLIC $ - PRIVATE "init io linalg meas monomial operator profiling rational sf smearing solver util xchange wrapper") diff --git a/config.guess b/config.guess deleted file mode 100644 index f7727026b..000000000 --- a/config.guess +++ /dev/null @@ -1,1701 +0,0 @@ -#! /bin/sh -# Attempt to guess a canonical system name. -# Copyright 1992-2021 Free Software Foundation, Inc. - -timestamp='2021-01-01' - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see . -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that -# program. This Exception is an additional permission under section 7 -# of the GNU General Public License, version 3 ("GPLv3"). -# -# Originally written by Per Bothner; maintained since 2000 by Ben Elliston. -# -# You can get the latest version of this script from: -# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess -# -# Please send patches to . - - -me=$(echo "$0" | sed -e 's,.*/,,') - -usage="\ -Usage: $0 [OPTION] - -Output the configuration name of the system \`$me' is run on. - -Options: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to ." - -version="\ -GNU config.guess ($timestamp) - -Originally written by Per Bothner. -Copyright 1992-2021 Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit ;; - --version | -v ) - echo "$version" ; exit ;; - --help | --h* | -h ) - echo "$usage"; exit ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" >&2 - exit 1 ;; - * ) - break ;; - esac -done - -if test $# != 0; then - echo "$me: too many arguments$help" >&2 - exit 1 -fi - -# CC_FOR_BUILD -- compiler used by this script. Note that the use of a -# compiler to aid in system detection is discouraged as it requires -# temporary files to be created and, as you can see below, it is a -# headache to deal with in a portable fashion. - -# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still -# use `HOST_CC' if defined, but it is deprecated. - -# Portable tmp directory creation inspired by the Autoconf team. - -tmp= -# shellcheck disable=SC2172 -trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15 - -set_cc_for_build() { - # prevent multiple calls if $tmp is already set - test "$tmp" && return 0 - : "${TMPDIR=/tmp}" - # shellcheck disable=SC2039 - { tmp=$( (umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null) && test -n "$tmp" && test -d "$tmp" ; } || - { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } || - { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } || - { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } - dummy=$tmp/dummy - case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in - ,,) echo "int x;" > "$dummy.c" - for driver in cc gcc c89 c99 ; do - if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then - CC_FOR_BUILD="$driver" - break - fi - done - if test x"$CC_FOR_BUILD" = x ; then - CC_FOR_BUILD=no_compiler_found - fi - ;; - ,,*) CC_FOR_BUILD=$CC ;; - ,*,*) CC_FOR_BUILD=$HOST_CC ;; - esac -} - -# This is needed to find uname on a Pyramid OSx when run in the BSD universe. -# (ghazi@noc.rutgers.edu 1994-08-24) -if test -f /.attbin/uname ; then - PATH=$PATH:/.attbin ; export PATH -fi - -UNAME_MACHINE=$( (uname -m) 2>/dev/null) || UNAME_MACHINE=unknown -UNAME_RELEASE=$( (uname -r) 2>/dev/null) || UNAME_RELEASE=unknown -UNAME_SYSTEM=$( (uname -s) 2>/dev/null) || UNAME_SYSTEM=unknown -UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown - -case "$UNAME_SYSTEM" in -Linux|GNU|GNU/*) - LIBC=unknown - - set_cc_for_build - cat <<-EOF > "$dummy.c" - #include - #if defined(__UCLIBC__) - LIBC=uclibc - #elif defined(__dietlibc__) - LIBC=dietlibc - #elif defined(__GLIBC__) - LIBC=gnu - #else - #include - /* First heuristic to detect musl libc. */ - #ifdef __DEFINED_va_list - LIBC=musl - #endif - #endif - EOF - eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')" - - # Second heuristic to detect musl libc. - if [ "$LIBC" = unknown ] && - command -v ldd >/dev/null && - ldd --version 2>&1 | grep -q ^musl; then - LIBC=musl - fi - - # If the system lacks a compiler, then just pick glibc. - # We could probably try harder. - if [ "$LIBC" = unknown ]; then - LIBC=gnu - fi - ;; -esac - -# Note: order is significant - the case branches are not exclusive. - -case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in - *:NetBSD:*:*) - # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*, - # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently - # switched to ELF, *-*-netbsd* would select the old - # object file format. This provides both forward - # compatibility and a consistent mechanism for selecting the - # object file format. - # - # Note: NetBSD doesn't particularly care about the vendor - # portion of the name. We always set it to "unknown". - sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \ - "/sbin/$sysctl" 2>/dev/null || \ - "/usr/sbin/$sysctl" 2>/dev/null || \ - echo unknown)) - case "$UNAME_MACHINE_ARCH" in - aarch64eb) machine=aarch64_be-unknown ;; - armeb) machine=armeb-unknown ;; - arm*) machine=arm-unknown ;; - sh3el) machine=shl-unknown ;; - sh3eb) machine=sh-unknown ;; - sh5el) machine=sh5le-unknown ;; - earmv*) - arch=$(echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,') - endian=$(echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p') - machine="${arch}${endian}"-unknown - ;; - *) machine="$UNAME_MACHINE_ARCH"-unknown ;; - esac - # The Operating System including object format, if it has switched - # to ELF recently (or will in the future) and ABI. - case "$UNAME_MACHINE_ARCH" in - earm*) - os=netbsdelf - ;; - arm*|i386|m68k|ns32k|sh3*|sparc|vax) - set_cc_for_build - if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ELF__ - then - # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). - # Return netbsd for either. FIX? - os=netbsd - else - os=netbsdelf - fi - ;; - *) - os=netbsd - ;; - esac - # Determine ABI tags. - case "$UNAME_MACHINE_ARCH" in - earm*) - expr='s/^earmv[0-9]/-eabi/;s/eb$//' - abi=$(echo "$UNAME_MACHINE_ARCH" | sed -e "$expr") - ;; - esac - # The OS release - # Debian GNU/NetBSD machines have a different userland, and - # thus, need a distinct triplet. However, they do not need - # kernel version information, so it can be replaced with a - # suitable tag, in the style of linux-gnu. - case "$UNAME_VERSION" in - Debian*) - release='-gnu' - ;; - *) - release=$(echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2) - ;; - esac - # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: - # contains redundant information, the shorter form: - # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "$machine-${os}${release}${abi-}" - exit ;; - *:Bitrig:*:*) - UNAME_MACHINE_ARCH=$(arch | sed 's/Bitrig.//') - echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE" - exit ;; - *:OpenBSD:*:*) - UNAME_MACHINE_ARCH=$(arch | sed 's/OpenBSD.//') - echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE" - exit ;; - *:LibertyBSD:*:*) - UNAME_MACHINE_ARCH=$(arch | sed 's/^.*BSD\.//') - echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE" - exit ;; - *:MidnightBSD:*:*) - echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE" - exit ;; - *:ekkoBSD:*:*) - echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE" - exit ;; - *:SolidBSD:*:*) - echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE" - exit ;; - *:OS108:*:*) - echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE" - exit ;; - macppc:MirBSD:*:*) - echo powerpc-unknown-mirbsd"$UNAME_RELEASE" - exit ;; - *:MirBSD:*:*) - echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE" - exit ;; - *:Sortix:*:*) - echo "$UNAME_MACHINE"-unknown-sortix - exit ;; - *:Twizzler:*:*) - echo "$UNAME_MACHINE"-unknown-twizzler - exit ;; - *:Redox:*:*) - echo "$UNAME_MACHINE"-unknown-redox - exit ;; - mips:OSF1:*.*) - echo mips-dec-osf1 - exit ;; - alpha:OSF1:*:*) - case $UNAME_RELEASE in - *4.0) - UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $3}') - ;; - *5.*) - UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $4}') - ;; - esac - # According to Compaq, /usr/sbin/psrinfo has been available on - # OSF/1 and Tru64 systems produced since 1995. I hope that - # covers most systems running today. This code pipes the CPU - # types through head -n 1, so we only detect the type of CPU 0. - ALPHA_CPU_TYPE=$(/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1) - case "$ALPHA_CPU_TYPE" in - "EV4 (21064)") - UNAME_MACHINE=alpha ;; - "EV4.5 (21064)") - UNAME_MACHINE=alpha ;; - "LCA4 (21066/21068)") - UNAME_MACHINE=alpha ;; - "EV5 (21164)") - UNAME_MACHINE=alphaev5 ;; - "EV5.6 (21164A)") - UNAME_MACHINE=alphaev56 ;; - "EV5.6 (21164PC)") - UNAME_MACHINE=alphapca56 ;; - "EV5.7 (21164PC)") - UNAME_MACHINE=alphapca57 ;; - "EV6 (21264)") - UNAME_MACHINE=alphaev6 ;; - "EV6.7 (21264A)") - UNAME_MACHINE=alphaev67 ;; - "EV6.8CB (21264C)") - UNAME_MACHINE=alphaev68 ;; - "EV6.8AL (21264B)") - UNAME_MACHINE=alphaev68 ;; - "EV6.8CX (21264D)") - UNAME_MACHINE=alphaev68 ;; - "EV6.9A (21264/EV69A)") - UNAME_MACHINE=alphaev69 ;; - "EV7 (21364)") - UNAME_MACHINE=alphaev7 ;; - "EV7.9 (21364A)") - UNAME_MACHINE=alphaev79 ;; - esac - # A Pn.n version is a patched version. - # A Vn.n version is a released version. - # A Tn.n version is a released field test version. - # A Xn.n version is an unreleased experimental baselevel. - # 1.2 uses "1.2" for uname -r. - echo "$UNAME_MACHINE"-dec-osf"$(echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)" - # Reset EXIT trap before exiting to avoid spurious non-zero exit code. - exitcode=$? - trap '' 0 - exit $exitcode ;; - Amiga*:UNIX_System_V:4.0:*) - echo m68k-unknown-sysv4 - exit ;; - *:[Aa]miga[Oo][Ss]:*:*) - echo "$UNAME_MACHINE"-unknown-amigaos - exit ;; - *:[Mm]orph[Oo][Ss]:*:*) - echo "$UNAME_MACHINE"-unknown-morphos - exit ;; - *:OS/390:*:*) - echo i370-ibm-openedition - exit ;; - *:z/VM:*:*) - echo s390-ibm-zvmoe - exit ;; - *:OS400:*:*) - echo powerpc-ibm-os400 - exit ;; - arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - echo arm-acorn-riscix"$UNAME_RELEASE" - exit ;; - arm*:riscos:*:*|arm*:RISCOS:*:*) - echo arm-unknown-riscos - exit ;; - SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) - echo hppa1.1-hitachi-hiuxmpp - exit ;; - Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) - # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. - if test "$( (/bin/universe) 2>/dev/null)" = att ; then - echo pyramid-pyramid-sysv3 - else - echo pyramid-pyramid-bsd - fi - exit ;; - NILE*:*:*:dcosx) - echo pyramid-pyramid-svr4 - exit ;; - DRS?6000:unix:4.0:6*) - echo sparc-icl-nx6 - exit ;; - DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) - case $(/usr/bin/uname -p) in - sparc) echo sparc-icl-nx7; exit ;; - esac ;; - s390x:SunOS:*:*) - echo "$UNAME_MACHINE"-ibm-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')" - exit ;; - sun4H:SunOS:5.*:*) - echo sparc-hal-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')" - exit ;; - sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - echo sparc-sun-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')" - exit ;; - i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*) - echo i386-pc-auroraux"$UNAME_RELEASE" - exit ;; - i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) - set_cc_for_build - SUN_ARCH=i386 - # If there is a compiler, see if it is configured for 64-bit objects. - # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. - # This test works for both compilers. - if test "$CC_FOR_BUILD" != no_compiler_found; then - if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - SUN_ARCH=x86_64 - fi - fi - echo "$SUN_ARCH"-pc-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')" - exit ;; - sun4*:SunOS:6*:*) - # According to config.sub, this is the proper way to canonicalize - # SunOS6. Hard to guess exactly what SunOS6 will be like, but - # it's likely to be more like Solaris than SunOS4. - echo sparc-sun-solaris3"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')" - exit ;; - sun4*:SunOS:*:*) - case "$(/usr/bin/arch -k)" in - Series*|S4*) - UNAME_RELEASE=$(uname -v) - ;; - esac - # Japanese Language versions have a version number like `4.1.3-JL'. - echo sparc-sun-sunos"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/')" - exit ;; - sun3*:SunOS:*:*) - echo m68k-sun-sunos"$UNAME_RELEASE" - exit ;; - sun*:*:4.2BSD:*) - UNAME_RELEASE=$( (sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null) - test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3 - case "$(/bin/arch)" in - sun3) - echo m68k-sun-sunos"$UNAME_RELEASE" - ;; - sun4) - echo sparc-sun-sunos"$UNAME_RELEASE" - ;; - esac - exit ;; - aushp:SunOS:*:*) - echo sparc-auspex-sunos"$UNAME_RELEASE" - exit ;; - # The situation for MiNT is a little confusing. The machine name - # can be virtually everything (everything which is not - # "atarist" or "atariste" at least should have a processor - # > m68000). The system name ranges from "MiNT" over "FreeMiNT" - # to the lowercase version "mint" (or "freemint"). Finally - # the system name "TOS" denotes a system which is actually not - # MiNT. But MiNT is downward compatible to TOS, so this should - # be no problem. - atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint"$UNAME_RELEASE" - exit ;; - atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint"$UNAME_RELEASE" - exit ;; - *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint"$UNAME_RELEASE" - exit ;; - milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint"$UNAME_RELEASE" - exit ;; - hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint"$UNAME_RELEASE" - exit ;; - *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint"$UNAME_RELEASE" - exit ;; - m68k:machten:*:*) - echo m68k-apple-machten"$UNAME_RELEASE" - exit ;; - powerpc:machten:*:*) - echo powerpc-apple-machten"$UNAME_RELEASE" - exit ;; - RISC*:Mach:*:*) - echo mips-dec-mach_bsd4.3 - exit ;; - RISC*:ULTRIX:*:*) - echo mips-dec-ultrix"$UNAME_RELEASE" - exit ;; - VAX*:ULTRIX*:*:*) - echo vax-dec-ultrix"$UNAME_RELEASE" - exit ;; - 2020:CLIX:*:* | 2430:CLIX:*:*) - echo clipper-intergraph-clix"$UNAME_RELEASE" - exit ;; - mips:*:*:UMIPS | mips:*:*:RISCos) - set_cc_for_build - sed 's/^ //' << EOF > "$dummy.c" -#ifdef __cplusplus -#include /* for printf() prototype */ - int main (int argc, char *argv[]) { -#else - int main (argc, argv) int argc; char *argv[]; { -#endif - #if defined (host_mips) && defined (MIPSEB) - #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0); - #endif - #endif - exit (-1); - } -EOF - $CC_FOR_BUILD -o "$dummy" "$dummy.c" && - dummyarg=$(echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p') && - SYSTEM_NAME=$("$dummy" "$dummyarg") && - { echo "$SYSTEM_NAME"; exit; } - echo mips-mips-riscos"$UNAME_RELEASE" - exit ;; - Motorola:PowerMAX_OS:*:*) - echo powerpc-motorola-powermax - exit ;; - Motorola:*:4.3:PL8-*) - echo powerpc-harris-powermax - exit ;; - Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) - echo powerpc-harris-powermax - exit ;; - Night_Hawk:Power_UNIX:*:*) - echo powerpc-harris-powerunix - exit ;; - m88k:CX/UX:7*:*) - echo m88k-harris-cxux7 - exit ;; - m88k:*:4*:R4*) - echo m88k-motorola-sysv4 - exit ;; - m88k:*:3*:R3*) - echo m88k-motorola-sysv3 - exit ;; - AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=$(/usr/bin/uname -p) - if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110 - then - if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \ - test "$TARGET_BINARY_INTERFACE"x = x - then - echo m88k-dg-dgux"$UNAME_RELEASE" - else - echo m88k-dg-dguxbcs"$UNAME_RELEASE" - fi - else - echo i586-dg-dgux"$UNAME_RELEASE" - fi - exit ;; - M88*:DolphinOS:*:*) # DolphinOS (SVR3) - echo m88k-dolphin-sysv3 - exit ;; - M88*:*:R3*:*) - # Delta 88k system running SVR3 - echo m88k-motorola-sysv3 - exit ;; - XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) - echo m88k-tektronix-sysv3 - exit ;; - Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) - echo m68k-tektronix-bsd - exit ;; - *:IRIX*:*:*) - echo mips-sgi-irix"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/g')" - exit ;; - ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit ;; # Note that: echo "'$(uname -s)'" gives 'AIX ' - i*86:AIX:*:*) - echo i386-ibm-aix - exit ;; - ia64:AIX:*:*) - if test -x /usr/bin/oslevel ; then - IBM_REV=$(/usr/bin/oslevel) - else - IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" - fi - echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV" - exit ;; - *:AIX:2:3) - if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - set_cc_for_build - sed 's/^ //' << EOF > "$dummy.c" - #include - - main() - { - if (!__power_pc()) - exit(1); - puts("powerpc-ibm-aix3.2.5"); - exit(0); - } -EOF - if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") - then - echo "$SYSTEM_NAME" - else - echo rs6000-ibm-aix3.2.5 - fi - elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then - echo rs6000-ibm-aix3.2.4 - else - echo rs6000-ibm-aix3.2 - fi - exit ;; - *:AIX:*:[4567]) - IBM_CPU_ID=$(/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }') - if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then - IBM_ARCH=rs6000 - else - IBM_ARCH=powerpc - fi - if test -x /usr/bin/lslpp ; then - IBM_REV=$(/usr/bin/lslpp -Lqc bos.rte.libc | - awk -F: '{ print $3 }' | sed s/[0-9]*$/0/) - else - IBM_REV="$UNAME_VERSION.$UNAME_RELEASE" - fi - echo "$IBM_ARCH"-ibm-aix"$IBM_REV" - exit ;; - *:AIX:*:*) - echo rs6000-ibm-aix - exit ;; - ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*) - echo romp-ibm-bsd4.4 - exit ;; - ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and - echo romp-ibm-bsd"$UNAME_RELEASE" # 4.3 with uname added to - exit ;; # report: romp-ibm BSD 4.3 - *:BOSX:*:*) - echo rs6000-bull-bosx - exit ;; - DPX/2?00:B.O.S.:*:*) - echo m68k-bull-sysv3 - exit ;; - 9000/[34]??:4.3bsd:1.*:*) - echo m68k-hp-bsd - exit ;; - hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) - echo m68k-hp-bsd4.4 - exit ;; - 9000/[34678]??:HP-UX:*:*) - HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//') - case "$UNAME_MACHINE" in - 9000/31?) HP_ARCH=m68000 ;; - 9000/[34]??) HP_ARCH=m68k ;; - 9000/[678][0-9][0-9]) - if test -x /usr/bin/getconf; then - sc_cpu_version=$(/usr/bin/getconf SC_CPU_VERSION 2>/dev/null) - sc_kernel_bits=$(/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null) - case "$sc_cpu_version" in - 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 - 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case "$sc_kernel_bits" in - 32) HP_ARCH=hppa2.0n ;; - 64) HP_ARCH=hppa2.0w ;; - '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 - esac ;; - esac - fi - if test "$HP_ARCH" = ""; then - set_cc_for_build - sed 's/^ //' << EOF > "$dummy.c" - - #define _HPUX_SOURCE - #include - #include - - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); - - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } -EOF - (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=$("$dummy") - test -z "$HP_ARCH" && HP_ARCH=hppa - fi ;; - esac - if test "$HP_ARCH" = hppa2.0w - then - set_cc_for_build - - # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating - # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler - # generating 64-bit code. GNU and HP use different nomenclature: - # - # $ CC_FOR_BUILD=cc ./config.guess - # => hppa2.0w-hp-hpux11.23 - # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess - # => hppa64-hp-hpux11.23 - - if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | - grep -q __LP64__ - then - HP_ARCH=hppa2.0w - else - HP_ARCH=hppa64 - fi - fi - echo "$HP_ARCH"-hp-hpux"$HPUX_REV" - exit ;; - ia64:HP-UX:*:*) - HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//') - echo ia64-hp-hpux"$HPUX_REV" - exit ;; - 3050*:HI-UX:*:*) - set_cc_for_build - sed 's/^ //' << EOF > "$dummy.c" - #include - int - main () - { - long cpu = sysconf (_SC_CPU_VERSION); - /* The order matters, because CPU_IS_HP_MC68K erroneously returns - true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct - results, however. */ - if (CPU_IS_PA_RISC (cpu)) - { - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; - case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; - default: puts ("hppa-hitachi-hiuxwe2"); break; - } - } - else if (CPU_IS_HP_MC68K (cpu)) - puts ("m68k-hitachi-hiuxwe2"); - else puts ("unknown-hitachi-hiuxwe2"); - exit (0); - } -EOF - $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") && - { echo "$SYSTEM_NAME"; exit; } - echo unknown-hitachi-hiuxwe2 - exit ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*) - echo hppa1.1-hp-bsd - exit ;; - 9000/8??:4.3bsd:*:*) - echo hppa1.0-hp-bsd - exit ;; - *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) - echo hppa1.0-hp-mpeix - exit ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*) - echo hppa1.1-hp-osf - exit ;; - hp8??:OSF1:*:*) - echo hppa1.0-hp-osf - exit ;; - i*86:OSF1:*:*) - if test -x /usr/sbin/sysversion ; then - echo "$UNAME_MACHINE"-unknown-osf1mk - else - echo "$UNAME_MACHINE"-unknown-osf1 - fi - exit ;; - parisc*:Lites*:*:*) - echo hppa1.1-hp-lites - exit ;; - C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) - echo c1-convex-bsd - exit ;; - C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit ;; - C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) - echo c34-convex-bsd - exit ;; - C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) - echo c38-convex-bsd - exit ;; - C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) - echo c4-convex-bsd - exit ;; - CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*[A-Z]90:*:*:*) - echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \ - | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ - -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ - -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*TS:*:*:*) - echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*T3E:*:*:*) - echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' - exit ;; - CRAY*SV1:*:*:*) - echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' - exit ;; - *:UNICOS/mp:*:*) - echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/' - exit ;; - F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=$(uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz) - FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///') - FUJITSU_REL=$(echo "$UNAME_RELEASE" | sed -e 's/ /_/') - echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit ;; - 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///') - FUJITSU_REL=$(echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/') - echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit ;; - i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) - echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE" - exit ;; - sparc*:BSD/OS:*:*) - echo sparc-unknown-bsdi"$UNAME_RELEASE" - exit ;; - *:BSD/OS:*:*) - echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE" - exit ;; - arm:FreeBSD:*:*) - UNAME_PROCESSOR=$(uname -p) - set_cc_for_build - if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ARM_PCS_VFP - then - echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabi - else - echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabihf - fi - exit ;; - *:FreeBSD:*:*) - UNAME_PROCESSOR=$(/usr/bin/uname -p) - case "$UNAME_PROCESSOR" in - amd64) - UNAME_PROCESSOR=x86_64 ;; - i386) - UNAME_PROCESSOR=i586 ;; - esac - echo "$UNAME_PROCESSOR"-unknown-freebsd"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')" - exit ;; - i*:CYGWIN*:*) - echo "$UNAME_MACHINE"-pc-cygwin - exit ;; - *:MINGW64*:*) - echo "$UNAME_MACHINE"-pc-mingw64 - exit ;; - *:MINGW*:*) - echo "$UNAME_MACHINE"-pc-mingw32 - exit ;; - *:MSYS*:*) - echo "$UNAME_MACHINE"-pc-msys - exit ;; - i*:PW*:*) - echo "$UNAME_MACHINE"-pc-pw32 - exit ;; - *:Interix*:*) - case "$UNAME_MACHINE" in - x86) - echo i586-pc-interix"$UNAME_RELEASE" - exit ;; - authenticamd | genuineintel | EM64T) - echo x86_64-unknown-interix"$UNAME_RELEASE" - exit ;; - IA64) - echo ia64-unknown-interix"$UNAME_RELEASE" - exit ;; - esac ;; - i*:UWIN*:*) - echo "$UNAME_MACHINE"-pc-uwin - exit ;; - amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) - echo x86_64-pc-cygwin - exit ;; - prep*:SunOS:5.*:*) - echo powerpcle-unknown-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')" - exit ;; - *:GNU:*:*) - # the GNU system - echo "$(echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,')-unknown-$LIBC$(echo "$UNAME_RELEASE"|sed -e 's,/.*$,,')" - exit ;; - *:GNU/*:*:*) - # other systems with GNU libc and userland - echo "$UNAME_MACHINE-unknown-$(echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]")$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')-$LIBC" - exit ;; - *:Minix:*:*) - echo "$UNAME_MACHINE"-unknown-minix - exit ;; - aarch64:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - aarch64_be:Linux:*:*) - UNAME_MACHINE=aarch64_be - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - alpha:Linux:*:*) - case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in - EV5) UNAME_MACHINE=alphaev5 ;; - EV56) UNAME_MACHINE=alphaev56 ;; - PCA56) UNAME_MACHINE=alphapca56 ;; - PCA57) UNAME_MACHINE=alphapca56 ;; - EV6) UNAME_MACHINE=alphaev6 ;; - EV67) UNAME_MACHINE=alphaev67 ;; - EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC=gnulibc1 ; fi - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - arc:Linux:*:* | arceb:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - arm*:Linux:*:*) - set_cc_for_build - if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ARM_EABI__ - then - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - else - if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep -q __ARM_PCS_VFP - then - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi - else - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf - fi - fi - exit ;; - avr32*:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - cris:Linux:*:*) - echo "$UNAME_MACHINE"-axis-linux-"$LIBC" - exit ;; - crisv32:Linux:*:*) - echo "$UNAME_MACHINE"-axis-linux-"$LIBC" - exit ;; - e2k:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - frv:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - hexagon:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - i*86:Linux:*:*) - echo "$UNAME_MACHINE"-pc-linux-"$LIBC" - exit ;; - ia64:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - k1om:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - m32r*:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - m68*:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - mips:Linux:*:* | mips64:Linux:*:*) - set_cc_for_build - IS_GLIBC=0 - test x"${LIBC}" = xgnu && IS_GLIBC=1 - sed 's/^ //' << EOF > "$dummy.c" - #undef CPU - #undef mips - #undef mipsel - #undef mips64 - #undef mips64el - #if ${IS_GLIBC} && defined(_ABI64) - LIBCABI=gnuabi64 - #else - #if ${IS_GLIBC} && defined(_ABIN32) - LIBCABI=gnuabin32 - #else - LIBCABI=${LIBC} - #endif - #endif - - #if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 - CPU=mipsisa64r6 - #else - #if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6 - CPU=mipsisa32r6 - #else - #if defined(__mips64) - CPU=mips64 - #else - CPU=mips - #endif - #endif - #endif - - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - MIPS_ENDIAN=el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - MIPS_ENDIAN= - #else - MIPS_ENDIAN= - #endif - #endif -EOF - eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI')" - test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; } - ;; - mips64el:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - openrisc*:Linux:*:*) - echo or1k-unknown-linux-"$LIBC" - exit ;; - or32:Linux:*:* | or1k*:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - padre:Linux:*:*) - echo sparc-unknown-linux-"$LIBC" - exit ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-"$LIBC" - exit ;; - parisc:Linux:*:* | hppa:Linux:*:*) - # Look for CPU level - case $(grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2) in - PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;; - PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;; - *) echo hppa-unknown-linux-"$LIBC" ;; - esac - exit ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-"$LIBC" - exit ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-"$LIBC" - exit ;; - ppc64le:Linux:*:*) - echo powerpc64le-unknown-linux-"$LIBC" - exit ;; - ppcle:Linux:*:*) - echo powerpcle-unknown-linux-"$LIBC" - exit ;; - riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - s390:Linux:*:* | s390x:Linux:*:*) - echo "$UNAME_MACHINE"-ibm-linux-"$LIBC" - exit ;; - sh64*:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - sh*:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - sparc:Linux:*:* | sparc64:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - tile*:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - vax:Linux:*:*) - echo "$UNAME_MACHINE"-dec-linux-"$LIBC" - exit ;; - x86_64:Linux:*:*) - set_cc_for_build - LIBCABI=$LIBC - if test "$CC_FOR_BUILD" != no_compiler_found; then - if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_X32 >/dev/null - then - LIBCABI="$LIBC"x32 - fi - fi - echo "$UNAME_MACHINE"-pc-linux-"$LIBCABI" - exit ;; - xtensa*:Linux:*:*) - echo "$UNAME_MACHINE"-unknown-linux-"$LIBC" - exit ;; - i*86:DYNIX/ptx:4*:*) - # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. - # earlier versions are messed up and put the nodename in both - # sysname and nodename. - echo i386-sequent-sysv4 - exit ;; - i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, - # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. - echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION" - exit ;; - i*86:OS/2:*:*) - # If we were able to find `uname', then EMX Unix compatibility - # is probably installed. - echo "$UNAME_MACHINE"-pc-os2-emx - exit ;; - i*86:XTS-300:*:STOP) - echo "$UNAME_MACHINE"-unknown-stop - exit ;; - i*86:atheos:*:*) - echo "$UNAME_MACHINE"-unknown-atheos - exit ;; - i*86:syllable:*:*) - echo "$UNAME_MACHINE"-pc-syllable - exit ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*) - echo i386-unknown-lynxos"$UNAME_RELEASE" - exit ;; - i*86:*DOS:*:*) - echo "$UNAME_MACHINE"-pc-msdosdjgpp - exit ;; - i*86:*:4.*:*) - UNAME_REL=$(echo "$UNAME_RELEASE" | sed 's/\/MP$//') - if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL" - else - echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL" - fi - exit ;; - i*86:*:5:[678]*) - # UnixWare 7.x, OpenUNIX and OpenServer 6. - case $(/bin/uname -X | grep "^Machine") in - *486*) UNAME_MACHINE=i486 ;; - *Pentium) UNAME_MACHINE=i586 ;; - *Pent*|*Celeron) UNAME_MACHINE=i686 ;; - esac - echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}" - exit ;; - i*86:*:3.2:*) - if test -f /usr/options/cb.name; then - UNAME_REL=$(sed -n 's/.*Version //p' /dev/null >/dev/null ; then - UNAME_REL=$( (/bin/uname -X|grep Release|sed -e 's/.*= //')) - (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 - (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ - && UNAME_MACHINE=i586 - (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ - && UNAME_MACHINE=i686 - (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ - && UNAME_MACHINE=i686 - echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL" - else - echo "$UNAME_MACHINE"-pc-sysv32 - fi - exit ;; - pc:*:*:*) - # Left here for compatibility: - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i586. - # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configure will decide that - # this is a cross-build. - echo i586-pc-msdosdjgpp - exit ;; - Intel:Mach:3*:*) - echo i386-pc-mach3 - exit ;; - paragon:*:*:*) - echo i860-intel-osf1 - exit ;; - i860:*:4.*:*) # i860-SVR4 - if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4 - else # Add other i860-SVR4 vendors below as they are discovered. - echo i860-unknown-sysv"$UNAME_RELEASE" # Unknown i860-SVR4 - fi - exit ;; - mini*:CTIX:SYS*5:*) - # "miniframe" - echo m68010-convergent-sysv - exit ;; - mc68k:UNIX:SYSTEM5:3.51m) - echo m68k-convergent-sysv - exit ;; - M680?0:D-NIX:5.3:*) - echo m68k-diab-dnix - exit ;; - M68*:*:R3V[5678]*:*) - test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; - 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) - OS_REL='' - test -r /etc/.relid \ - && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; - 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4; exit; } ;; - NCR*:*:4.2:* | MPRAS*:*:4.2:*) - OS_REL='.3' - test -r /etc/.relid \ - && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && { echo i486-ncr-sysv4.3"$OS_REL"; exit; } - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } - /bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \ - && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;; - m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) - echo m68k-unknown-lynxos"$UNAME_RELEASE" - exit ;; - mc68030:UNIX_System_V:4.*:*) - echo m68k-atari-sysv4 - exit ;; - TSUNAMI:LynxOS:2.*:*) - echo sparc-unknown-lynxos"$UNAME_RELEASE" - exit ;; - rs6000:LynxOS:2.*:*) - echo rs6000-unknown-lynxos"$UNAME_RELEASE" - exit ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*) - echo powerpc-unknown-lynxos"$UNAME_RELEASE" - exit ;; - SM[BE]S:UNIX_SV:*:*) - echo mips-dde-sysv"$UNAME_RELEASE" - exit ;; - RM*:ReliantUNIX-*:*:*) - echo mips-sni-sysv4 - exit ;; - RM*:SINIX-*:*:*) - echo mips-sni-sysv4 - exit ;; - *:SINIX-*:*:*) - if uname -p 2>/dev/null >/dev/null ; then - UNAME_MACHINE=$( (uname -p) 2>/dev/null) - echo "$UNAME_MACHINE"-sni-sysv4 - else - echo ns32k-sni-sysv - fi - exit ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - echo i586-unisys-sysv4 - exit ;; - *:UNIX_System_V:4*:FTX*) - # From Gerald Hewes . - # How about differentiating between stratus architectures? -djm - echo hppa1.1-stratus-sysv4 - exit ;; - *:*:*:FTX*) - # From seanf@swdc.stratus.com. - echo i860-stratus-sysv4 - exit ;; - i*86:VOS:*:*) - # From Paul.Green@stratus.com. - echo "$UNAME_MACHINE"-stratus-vos - exit ;; - *:VOS:*:*) - # From Paul.Green@stratus.com. - echo hppa1.1-stratus-vos - exit ;; - mc68*:A/UX:*:*) - echo m68k-apple-aux"$UNAME_RELEASE" - exit ;; - news*:NEWS-OS:6*:*) - echo mips-sony-newsos6 - exit ;; - R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) - if test -d /usr/nec; then - echo mips-nec-sysv"$UNAME_RELEASE" - else - echo mips-unknown-sysv"$UNAME_RELEASE" - fi - exit ;; - BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. - echo powerpc-be-beos - exit ;; - BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. - echo powerpc-apple-beos - exit ;; - BePC:BeOS:*:*) # BeOS running on Intel PC compatible. - echo i586-pc-beos - exit ;; - BePC:Haiku:*:*) # Haiku running on Intel PC compatible. - echo i586-pc-haiku - exit ;; - x86_64:Haiku:*:*) - echo x86_64-unknown-haiku - exit ;; - SX-4:SUPER-UX:*:*) - echo sx4-nec-superux"$UNAME_RELEASE" - exit ;; - SX-5:SUPER-UX:*:*) - echo sx5-nec-superux"$UNAME_RELEASE" - exit ;; - SX-6:SUPER-UX:*:*) - echo sx6-nec-superux"$UNAME_RELEASE" - exit ;; - SX-7:SUPER-UX:*:*) - echo sx7-nec-superux"$UNAME_RELEASE" - exit ;; - SX-8:SUPER-UX:*:*) - echo sx8-nec-superux"$UNAME_RELEASE" - exit ;; - SX-8R:SUPER-UX:*:*) - echo sx8r-nec-superux"$UNAME_RELEASE" - exit ;; - SX-ACE:SUPER-UX:*:*) - echo sxace-nec-superux"$UNAME_RELEASE" - exit ;; - Power*:Rhapsody:*:*) - echo powerpc-apple-rhapsody"$UNAME_RELEASE" - exit ;; - *:Rhapsody:*:*) - echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE" - exit ;; - arm64:Darwin:*:*) - echo aarch64-apple-darwin"$UNAME_RELEASE" - exit ;; - *:Darwin:*:*) - UNAME_PROCESSOR=$(uname -p) - case $UNAME_PROCESSOR in - unknown) UNAME_PROCESSOR=powerpc ;; - esac - if command -v xcode-select > /dev/null 2> /dev/null && \ - ! xcode-select --print-path > /dev/null 2> /dev/null ; then - # Avoid executing cc if there is no toolchain installed as - # cc will be a stub that puts up a graphical alert - # prompting the user to install developer tools. - CC_FOR_BUILD=no_compiler_found - else - set_cc_for_build - fi - if test "$CC_FOR_BUILD" != no_compiler_found; then - if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_64BIT_ARCH >/dev/null - then - case $UNAME_PROCESSOR in - i386) UNAME_PROCESSOR=x86_64 ;; - powerpc) UNAME_PROCESSOR=powerpc64 ;; - esac - fi - # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc - if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ - grep IS_PPC >/dev/null - then - UNAME_PROCESSOR=powerpc - fi - elif test "$UNAME_PROCESSOR" = i386 ; then - # uname -m returns i386 or x86_64 - UNAME_PROCESSOR=$UNAME_MACHINE - fi - echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE" - exit ;; - *:procnto*:*:* | *:QNX:[0123456789]*:*) - UNAME_PROCESSOR=$(uname -p) - if test "$UNAME_PROCESSOR" = x86; then - UNAME_PROCESSOR=i386 - UNAME_MACHINE=pc - fi - echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE" - exit ;; - *:QNX:*:4*) - echo i386-pc-qnx - exit ;; - NEO-*:NONSTOP_KERNEL:*:*) - echo neo-tandem-nsk"$UNAME_RELEASE" - exit ;; - NSE-*:NONSTOP_KERNEL:*:*) - echo nse-tandem-nsk"$UNAME_RELEASE" - exit ;; - NSR-*:NONSTOP_KERNEL:*:*) - echo nsr-tandem-nsk"$UNAME_RELEASE" - exit ;; - NSV-*:NONSTOP_KERNEL:*:*) - echo nsv-tandem-nsk"$UNAME_RELEASE" - exit ;; - NSX-*:NONSTOP_KERNEL:*:*) - echo nsx-tandem-nsk"$UNAME_RELEASE" - exit ;; - *:NonStop-UX:*:*) - echo mips-compaq-nonstopux - exit ;; - BS2000:POSIX*:*:*) - echo bs2000-siemens-sysv - exit ;; - DS/*:UNIX_System_V:*:*) - echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE" - exit ;; - *:Plan9:*:*) - # "uname -m" is not consistent, so use $cputype instead. 386 - # is converted to i386 for consistency with other x86 - # operating systems. - # shellcheck disable=SC2154 - if test "$cputype" = 386; then - UNAME_MACHINE=i386 - else - UNAME_MACHINE="$cputype" - fi - echo "$UNAME_MACHINE"-unknown-plan9 - exit ;; - *:TOPS-10:*:*) - echo pdp10-unknown-tops10 - exit ;; - *:TENEX:*:*) - echo pdp10-unknown-tenex - exit ;; - KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) - echo pdp10-dec-tops20 - exit ;; - XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) - echo pdp10-xkl-tops20 - exit ;; - *:TOPS-20:*:*) - echo pdp10-unknown-tops20 - exit ;; - *:ITS:*:*) - echo pdp10-unknown-its - exit ;; - SEI:*:*:SEIUX) - echo mips-sei-seiux"$UNAME_RELEASE" - exit ;; - *:DragonFly:*:*) - echo "$UNAME_MACHINE"-unknown-dragonfly"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')" - exit ;; - *:*VMS:*:*) - UNAME_MACHINE=$( (uname -p) 2>/dev/null) - case "$UNAME_MACHINE" in - A*) echo alpha-dec-vms ; exit ;; - I*) echo ia64-dec-vms ; exit ;; - V*) echo vax-dec-vms ; exit ;; - esac ;; - *:XENIX:*:SysV) - echo i386-pc-xenix - exit ;; - i*86:skyos:*:*) - echo "$UNAME_MACHINE"-pc-skyos"$(echo "$UNAME_RELEASE" | sed -e 's/ .*$//')" - exit ;; - i*86:rdos:*:*) - echo "$UNAME_MACHINE"-pc-rdos - exit ;; - i*86:AROS:*:*) - echo "$UNAME_MACHINE"-pc-aros - exit ;; - x86_64:VMkernel:*:*) - echo "$UNAME_MACHINE"-unknown-esx - exit ;; - amd64:Isilon\ OneFS:*:*) - echo x86_64-unknown-onefs - exit ;; - *:Unleashed:*:*) - echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE" - exit ;; -esac - -# No uname command or uname output not recognized. -set_cc_for_build -cat > "$dummy.c" < -#include -#endif -#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) -#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) -#include -#if defined(_SIZE_T_) || defined(SIGLOST) -#include -#endif -#endif -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=$( (hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null); - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); -#endif - -#if defined (vax) -#if !defined (ultrix) -#include -#if defined (BSD) -#if BSD == 43 - printf ("vax-dec-bsd4.3\n"); exit (0); -#else -#if BSD == 199006 - printf ("vax-dec-bsd4.3reno\n"); exit (0); -#else - printf ("vax-dec-bsd\n"); exit (0); -#endif -#endif -#else - printf ("vax-dec-bsd\n"); exit (0); -#endif -#else -#if defined(_SIZE_T_) || defined(SIGLOST) - struct utsname un; - uname (&un); - printf ("vax-dec-ultrix%s\n", un.release); exit (0); -#else - printf ("vax-dec-ultrix\n"); exit (0); -#endif -#endif -#endif -#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__) -#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__) -#if defined(_SIZE_T_) || defined(SIGLOST) - struct utsname *un; - uname (&un); - printf ("mips-dec-ultrix%s\n", un.release); exit (0); -#else - printf ("mips-dec-ultrix\n"); exit (0); -#endif -#endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=$($dummy) && - { echo "$SYSTEM_NAME"; exit; } - -# Apollos put the system type in the environment. -test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; } - -echo "$0: unable to guess system type" >&2 - -case "$UNAME_MACHINE:$UNAME_SYSTEM" in - mips:Linux | mips64:Linux) - # If we got here on MIPS GNU/Linux, output extra information. - cat >&2 <&2 <&2 </dev/null || echo unknown) -uname -r = $( (uname -r) 2>/dev/null || echo unknown) -uname -s = $( (uname -s) 2>/dev/null || echo unknown) -uname -v = $( (uname -v) 2>/dev/null || echo unknown) - -/usr/bin/uname -p = $( (/usr/bin/uname -p) 2>/dev/null) -/bin/uname -X = $( (/bin/uname -X) 2>/dev/null) - -hostinfo = $( (hostinfo) 2>/dev/null) -/bin/universe = $( (/bin/universe) 2>/dev/null) -/usr/bin/arch -k = $( (/usr/bin/arch -k) 2>/dev/null) -/bin/arch = $( (/bin/arch) 2>/dev/null) -/usr/bin/oslevel = $( (/usr/bin/oslevel) 2>/dev/null) -/usr/convex/getsysinfo = $( (/usr/convex/getsysinfo) 2>/dev/null) - -UNAME_MACHINE = "$UNAME_MACHINE" -UNAME_RELEASE = "$UNAME_RELEASE" -UNAME_SYSTEM = "$UNAME_SYSTEM" -UNAME_VERSION = "$UNAME_VERSION" -EOF -fi - -exit 1 - -# Local variables: -# eval: (add-hook 'before-save-hook 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff --git a/config.sub b/config.sub deleted file mode 100644 index 0cbdae682..000000000 --- a/config.sub +++ /dev/null @@ -1,1855 +0,0 @@ -#! /bin/sh -# Configuration validation subroutine script. -# Copyright 1992-2021 Free Software Foundation, Inc. - -timestamp='2021-01-01' - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, see . -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that -# program. This Exception is an additional permission under section 7 -# of the GNU General Public License, version 3 ("GPLv3"). - - -# Please send patches to . -# -# Configuration subroutine to validate and canonicalize a configuration type. -# Supply the specified configuration type as an argument. -# If it is invalid, we print an error message on stderr and exit with code 1. -# Otherwise, we print the canonical config type on stdout and succeed. - -# You can get the latest version of this script from: -# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub - -# This file is supposed to be the same for all GNU packages -# and recognize all the CPU types, system types and aliases -# that are meaningful with *any* GNU software. -# Each package is responsible for reporting which valid configurations -# it does not support. The user should be able to distinguish -# a failure to support a valid configuration from a meaningless -# configuration. - -# The goal of this file is to map all the various variations of a given -# machine specification into a single specification in the form: -# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM -# or in some cases, the newer four-part form: -# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM -# It is wrong to echo any other type of specification. - -me=$(echo "$0" | sed -e 's,.*/,,') - -usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS - -Canonicalize a configuration name. - -Options: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to ." - -version="\ -GNU config.sub ($timestamp) - -Copyright 1992-2021 Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit ;; - --version | -v ) - echo "$version" ; exit ;; - --help | --h* | -h ) - echo "$usage"; exit ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" >&2 - exit 1 ;; - - *local*) - # First pass through any local machine types. - echo "$1" - exit ;; - - * ) - break ;; - esac -done - -case $# in - 0) echo "$me: missing argument$help" >&2 - exit 1;; - 1) ;; - *) echo "$me: too many arguments$help" >&2 - exit 1;; -esac - -# Split fields of configuration type -# shellcheck disable=SC2162 -IFS="-" read field1 field2 field3 field4 <&2 - exit 1 - ;; - *-*-*-*) - basic_machine=$field1-$field2 - basic_os=$field3-$field4 - ;; - *-*-*) - # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two - # parts - maybe_os=$field2-$field3 - case $maybe_os in - nto-qnx* | linux-* | uclinux-uclibc* \ - | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \ - | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \ - | storm-chaos* | os2-emx* | rtmk-nova*) - basic_machine=$field1 - basic_os=$maybe_os - ;; - android-linux) - basic_machine=$field1-unknown - basic_os=linux-android - ;; - *) - basic_machine=$field1-$field2 - basic_os=$field3 - ;; - esac - ;; - *-*) - # A lone config we happen to match not fitting any pattern - case $field1-$field2 in - decstation-3100) - basic_machine=mips-dec - basic_os= - ;; - *-*) - # Second component is usually, but not always the OS - case $field2 in - # Prevent following clause from handling this valid os - sun*os*) - basic_machine=$field1 - basic_os=$field2 - ;; - # Manufacturers - dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \ - | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \ - | unicom* | ibm* | next | hp | isi* | apollo | altos* \ - | convergent* | ncr* | news | 32* | 3600* | 3100* \ - | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \ - | ultra | tti* | harris | dolphin | highlevel | gould \ - | cbm | ns | masscomp | apple | axis | knuth | cray \ - | microblaze* | sim | cisco \ - | oki | wec | wrs | winbond) - basic_machine=$field1-$field2 - basic_os= - ;; - *) - basic_machine=$field1 - basic_os=$field2 - ;; - esac - ;; - esac - ;; - *) - # Convert single-component short-hands not valid as part of - # multi-component configurations. - case $field1 in - 386bsd) - basic_machine=i386-pc - basic_os=bsd - ;; - a29khif) - basic_machine=a29k-amd - basic_os=udi - ;; - adobe68k) - basic_machine=m68010-adobe - basic_os=scout - ;; - alliant) - basic_machine=fx80-alliant - basic_os= - ;; - altos | altos3068) - basic_machine=m68k-altos - basic_os= - ;; - am29k) - basic_machine=a29k-none - basic_os=bsd - ;; - amdahl) - basic_machine=580-amdahl - basic_os=sysv - ;; - amiga) - basic_machine=m68k-unknown - basic_os= - ;; - amigaos | amigados) - basic_machine=m68k-unknown - basic_os=amigaos - ;; - amigaunix | amix) - basic_machine=m68k-unknown - basic_os=sysv4 - ;; - apollo68) - basic_machine=m68k-apollo - basic_os=sysv - ;; - apollo68bsd) - basic_machine=m68k-apollo - basic_os=bsd - ;; - aros) - basic_machine=i386-pc - basic_os=aros - ;; - aux) - basic_machine=m68k-apple - basic_os=aux - ;; - balance) - basic_machine=ns32k-sequent - basic_os=dynix - ;; - blackfin) - basic_machine=bfin-unknown - basic_os=linux - ;; - cegcc) - basic_machine=arm-unknown - basic_os=cegcc - ;; - convex-c1) - basic_machine=c1-convex - basic_os=bsd - ;; - convex-c2) - basic_machine=c2-convex - basic_os=bsd - ;; - convex-c32) - basic_machine=c32-convex - basic_os=bsd - ;; - convex-c34) - basic_machine=c34-convex - basic_os=bsd - ;; - convex-c38) - basic_machine=c38-convex - basic_os=bsd - ;; - cray) - basic_machine=j90-cray - basic_os=unicos - ;; - crds | unos) - basic_machine=m68k-crds - basic_os= - ;; - da30) - basic_machine=m68k-da30 - basic_os= - ;; - decstation | pmax | pmin | dec3100 | decstatn) - basic_machine=mips-dec - basic_os= - ;; - delta88) - basic_machine=m88k-motorola - basic_os=sysv3 - ;; - dicos) - basic_machine=i686-pc - basic_os=dicos - ;; - djgpp) - basic_machine=i586-pc - basic_os=msdosdjgpp - ;; - ebmon29k) - basic_machine=a29k-amd - basic_os=ebmon - ;; - es1800 | OSE68k | ose68k | ose | OSE) - basic_machine=m68k-ericsson - basic_os=ose - ;; - gmicro) - basic_machine=tron-gmicro - basic_os=sysv - ;; - go32) - basic_machine=i386-pc - basic_os=go32 - ;; - h8300hms) - basic_machine=h8300-hitachi - basic_os=hms - ;; - h8300xray) - basic_machine=h8300-hitachi - basic_os=xray - ;; - h8500hms) - basic_machine=h8500-hitachi - basic_os=hms - ;; - harris) - basic_machine=m88k-harris - basic_os=sysv3 - ;; - hp300 | hp300hpux) - basic_machine=m68k-hp - basic_os=hpux - ;; - hp300bsd) - basic_machine=m68k-hp - basic_os=bsd - ;; - hppaosf) - basic_machine=hppa1.1-hp - basic_os=osf - ;; - hppro) - basic_machine=hppa1.1-hp - basic_os=proelf - ;; - i386mach) - basic_machine=i386-mach - basic_os=mach - ;; - isi68 | isi) - basic_machine=m68k-isi - basic_os=sysv - ;; - m68knommu) - basic_machine=m68k-unknown - basic_os=linux - ;; - magnum | m3230) - basic_machine=mips-mips - basic_os=sysv - ;; - merlin) - basic_machine=ns32k-utek - basic_os=sysv - ;; - mingw64) - basic_machine=x86_64-pc - basic_os=mingw64 - ;; - mingw32) - basic_machine=i686-pc - basic_os=mingw32 - ;; - mingw32ce) - basic_machine=arm-unknown - basic_os=mingw32ce - ;; - monitor) - basic_machine=m68k-rom68k - basic_os=coff - ;; - morphos) - basic_machine=powerpc-unknown - basic_os=morphos - ;; - moxiebox) - basic_machine=moxie-unknown - basic_os=moxiebox - ;; - msdos) - basic_machine=i386-pc - basic_os=msdos - ;; - msys) - basic_machine=i686-pc - basic_os=msys - ;; - mvs) - basic_machine=i370-ibm - basic_os=mvs - ;; - nacl) - basic_machine=le32-unknown - basic_os=nacl - ;; - ncr3000) - basic_machine=i486-ncr - basic_os=sysv4 - ;; - netbsd386) - basic_machine=i386-pc - basic_os=netbsd - ;; - netwinder) - basic_machine=armv4l-rebel - basic_os=linux - ;; - news | news700 | news800 | news900) - basic_machine=m68k-sony - basic_os=newsos - ;; - news1000) - basic_machine=m68030-sony - basic_os=newsos - ;; - necv70) - basic_machine=v70-nec - basic_os=sysv - ;; - nh3000) - basic_machine=m68k-harris - basic_os=cxux - ;; - nh[45]000) - basic_machine=m88k-harris - basic_os=cxux - ;; - nindy960) - basic_machine=i960-intel - basic_os=nindy - ;; - mon960) - basic_machine=i960-intel - basic_os=mon960 - ;; - nonstopux) - basic_machine=mips-compaq - basic_os=nonstopux - ;; - os400) - basic_machine=powerpc-ibm - basic_os=os400 - ;; - OSE68000 | ose68000) - basic_machine=m68000-ericsson - basic_os=ose - ;; - os68k) - basic_machine=m68k-none - basic_os=os68k - ;; - paragon) - basic_machine=i860-intel - basic_os=osf - ;; - parisc) - basic_machine=hppa-unknown - basic_os=linux - ;; - psp) - basic_machine=mipsallegrexel-sony - basic_os=psp - ;; - pw32) - basic_machine=i586-unknown - basic_os=pw32 - ;; - rdos | rdos64) - basic_machine=x86_64-pc - basic_os=rdos - ;; - rdos32) - basic_machine=i386-pc - basic_os=rdos - ;; - rom68k) - basic_machine=m68k-rom68k - basic_os=coff - ;; - sa29200) - basic_machine=a29k-amd - basic_os=udi - ;; - sei) - basic_machine=mips-sei - basic_os=seiux - ;; - sequent) - basic_machine=i386-sequent - basic_os= - ;; - sps7) - basic_machine=m68k-bull - basic_os=sysv2 - ;; - st2000) - basic_machine=m68k-tandem - basic_os= - ;; - stratus) - basic_machine=i860-stratus - basic_os=sysv4 - ;; - sun2) - basic_machine=m68000-sun - basic_os= - ;; - sun2os3) - basic_machine=m68000-sun - basic_os=sunos3 - ;; - sun2os4) - basic_machine=m68000-sun - basic_os=sunos4 - ;; - sun3) - basic_machine=m68k-sun - basic_os= - ;; - sun3os3) - basic_machine=m68k-sun - basic_os=sunos3 - ;; - sun3os4) - basic_machine=m68k-sun - basic_os=sunos4 - ;; - sun4) - basic_machine=sparc-sun - basic_os= - ;; - sun4os3) - basic_machine=sparc-sun - basic_os=sunos3 - ;; - sun4os4) - basic_machine=sparc-sun - basic_os=sunos4 - ;; - sun4sol2) - basic_machine=sparc-sun - basic_os=solaris2 - ;; - sun386 | sun386i | roadrunner) - basic_machine=i386-sun - basic_os= - ;; - sv1) - basic_machine=sv1-cray - basic_os=unicos - ;; - symmetry) - basic_machine=i386-sequent - basic_os=dynix - ;; - t3e) - basic_machine=alphaev5-cray - basic_os=unicos - ;; - t90) - basic_machine=t90-cray - basic_os=unicos - ;; - toad1) - basic_machine=pdp10-xkl - basic_os=tops20 - ;; - tpf) - basic_machine=s390x-ibm - basic_os=tpf - ;; - udi29k) - basic_machine=a29k-amd - basic_os=udi - ;; - ultra3) - basic_machine=a29k-nyu - basic_os=sym1 - ;; - v810 | necv810) - basic_machine=v810-nec - basic_os=none - ;; - vaxv) - basic_machine=vax-dec - basic_os=sysv - ;; - vms) - basic_machine=vax-dec - basic_os=vms - ;; - vsta) - basic_machine=i386-pc - basic_os=vsta - ;; - vxworks960) - basic_machine=i960-wrs - basic_os=vxworks - ;; - vxworks68) - basic_machine=m68k-wrs - basic_os=vxworks - ;; - vxworks29k) - basic_machine=a29k-wrs - basic_os=vxworks - ;; - xbox) - basic_machine=i686-pc - basic_os=mingw32 - ;; - ymp) - basic_machine=ymp-cray - basic_os=unicos - ;; - *) - basic_machine=$1 - basic_os= - ;; - esac - ;; -esac - -# Decode 1-component or ad-hoc basic machines -case $basic_machine in - # Here we handle the default manufacturer of certain CPU types. It is in - # some cases the only manufacturer, in others, it is the most popular. - w89k) - cpu=hppa1.1 - vendor=winbond - ;; - op50n) - cpu=hppa1.1 - vendor=oki - ;; - op60c) - cpu=hppa1.1 - vendor=oki - ;; - ibm*) - cpu=i370 - vendor=ibm - ;; - orion105) - cpu=clipper - vendor=highlevel - ;; - mac | mpw | mac-mpw) - cpu=m68k - vendor=apple - ;; - pmac | pmac-mpw) - cpu=powerpc - vendor=apple - ;; - - # Recognize the various machine names and aliases which stand - # for a CPU type and a company and sometimes even an OS. - 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) - cpu=m68000 - vendor=att - ;; - 3b*) - cpu=we32k - vendor=att - ;; - bluegene*) - cpu=powerpc - vendor=ibm - basic_os=cnk - ;; - decsystem10* | dec10*) - cpu=pdp10 - vendor=dec - basic_os=tops10 - ;; - decsystem20* | dec20*) - cpu=pdp10 - vendor=dec - basic_os=tops20 - ;; - delta | 3300 | motorola-3300 | motorola-delta \ - | 3300-motorola | delta-motorola) - cpu=m68k - vendor=motorola - ;; - dpx2*) - cpu=m68k - vendor=bull - basic_os=sysv3 - ;; - encore | umax | mmax) - cpu=ns32k - vendor=encore - ;; - elxsi) - cpu=elxsi - vendor=elxsi - basic_os=${basic_os:-bsd} - ;; - fx2800) - cpu=i860 - vendor=alliant - ;; - genix) - cpu=ns32k - vendor=ns - ;; - h3050r* | hiux*) - cpu=hppa1.1 - vendor=hitachi - basic_os=hiuxwe2 - ;; - hp3k9[0-9][0-9] | hp9[0-9][0-9]) - cpu=hppa1.0 - vendor=hp - ;; - hp9k2[0-9][0-9] | hp9k31[0-9]) - cpu=m68000 - vendor=hp - ;; - hp9k3[2-9][0-9]) - cpu=m68k - vendor=hp - ;; - hp9k6[0-9][0-9] | hp6[0-9][0-9]) - cpu=hppa1.0 - vendor=hp - ;; - hp9k7[0-79][0-9] | hp7[0-79][0-9]) - cpu=hppa1.1 - vendor=hp - ;; - hp9k78[0-9] | hp78[0-9]) - # FIXME: really hppa2.0-hp - cpu=hppa1.1 - vendor=hp - ;; - hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) - # FIXME: really hppa2.0-hp - cpu=hppa1.1 - vendor=hp - ;; - hp9k8[0-9][13679] | hp8[0-9][13679]) - cpu=hppa1.1 - vendor=hp - ;; - hp9k8[0-9][0-9] | hp8[0-9][0-9]) - cpu=hppa1.0 - vendor=hp - ;; - i*86v32) - cpu=$(echo "$1" | sed -e 's/86.*/86/') - vendor=pc - basic_os=sysv32 - ;; - i*86v4*) - cpu=$(echo "$1" | sed -e 's/86.*/86/') - vendor=pc - basic_os=sysv4 - ;; - i*86v) - cpu=$(echo "$1" | sed -e 's/86.*/86/') - vendor=pc - basic_os=sysv - ;; - i*86sol2) - cpu=$(echo "$1" | sed -e 's/86.*/86/') - vendor=pc - basic_os=solaris2 - ;; - j90 | j90-cray) - cpu=j90 - vendor=cray - basic_os=${basic_os:-unicos} - ;; - iris | iris4d) - cpu=mips - vendor=sgi - case $basic_os in - irix*) - ;; - *) - basic_os=irix4 - ;; - esac - ;; - miniframe) - cpu=m68000 - vendor=convergent - ;; - *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*) - cpu=m68k - vendor=atari - basic_os=mint - ;; - news-3600 | risc-news) - cpu=mips - vendor=sony - basic_os=newsos - ;; - next | m*-next) - cpu=m68k - vendor=next - case $basic_os in - openstep*) - ;; - nextstep*) - ;; - ns2*) - basic_os=nextstep2 - ;; - *) - basic_os=nextstep3 - ;; - esac - ;; - np1) - cpu=np1 - vendor=gould - ;; - op50n-* | op60c-*) - cpu=hppa1.1 - vendor=oki - basic_os=proelf - ;; - pa-hitachi) - cpu=hppa1.1 - vendor=hitachi - basic_os=hiuxwe2 - ;; - pbd) - cpu=sparc - vendor=tti - ;; - pbb) - cpu=m68k - vendor=tti - ;; - pc532) - cpu=ns32k - vendor=pc532 - ;; - pn) - cpu=pn - vendor=gould - ;; - power) - cpu=power - vendor=ibm - ;; - ps2) - cpu=i386 - vendor=ibm - ;; - rm[46]00) - cpu=mips - vendor=siemens - ;; - rtpc | rtpc-*) - cpu=romp - vendor=ibm - ;; - sde) - cpu=mipsisa32 - vendor=sde - basic_os=${basic_os:-elf} - ;; - simso-wrs) - cpu=sparclite - vendor=wrs - basic_os=vxworks - ;; - tower | tower-32) - cpu=m68k - vendor=ncr - ;; - vpp*|vx|vx-*) - cpu=f301 - vendor=fujitsu - ;; - w65) - cpu=w65 - vendor=wdc - ;; - w89k-*) - cpu=hppa1.1 - vendor=winbond - basic_os=proelf - ;; - none) - cpu=none - vendor=none - ;; - leon|leon[3-9]) - cpu=sparc - vendor=$basic_machine - ;; - leon-*|leon[3-9]-*) - cpu=sparc - vendor=$(echo "$basic_machine" | sed 's/-.*//') - ;; - - *-*) - # shellcheck disable=SC2162 - IFS="-" read cpu vendor <&2 - exit 1 - ;; - esac - ;; -esac - -# Here we canonicalize certain aliases for manufacturers. -case $vendor in - digital*) - vendor=dec - ;; - commodore*) - vendor=cbm - ;; - *) - ;; -esac - -# Decode manufacturer-specific aliases for certain operating systems. - -if test x$basic_os != x -then - -# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just -# set os. -case $basic_os in - gnu/linux*) - kernel=linux - os=$(echo $basic_os | sed -e 's|gnu/linux|gnu|') - ;; - os2-emx) - kernel=os2 - os=$(echo $basic_os | sed -e 's|os2-emx|emx|') - ;; - nto-qnx*) - kernel=nto - os=$(echo $basic_os | sed -e 's|nto-qnx|qnx|') - ;; - *-*) - # shellcheck disable=SC2162 - IFS="-" read kernel os <&2 - exit 1 - ;; -esac - -# As a final step for OS-related things, validate the OS-kernel combination -# (given a valid OS), if there is a kernel. -case $kernel-$os in - linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* ) - ;; - uclinux-uclibc* ) - ;; - -dietlibc* | -newlib* | -musl* | -uclibc* ) - # These are just libc implementations, not actual OSes, and thus - # require a kernel. - echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2 - exit 1 - ;; - kfreebsd*-gnu* | kopensolaris*-gnu*) - ;; - nto-qnx*) - ;; - os2-emx) - ;; - *-eabi* | *-gnueabi*) - ;; - -*) - # Blank kernel with real OS is always fine. - ;; - *-*) - echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2 - exit 1 - ;; -esac - -# Here we handle the case where we know the os, and the CPU type, but not the -# manufacturer. We pick the logical manufacturer. -case $vendor in - unknown) - case $cpu-$os in - *-riscix*) - vendor=acorn - ;; - *-sunos*) - vendor=sun - ;; - *-cnk* | *-aix*) - vendor=ibm - ;; - *-beos*) - vendor=be - ;; - *-hpux*) - vendor=hp - ;; - *-mpeix*) - vendor=hp - ;; - *-hiux*) - vendor=hitachi - ;; - *-unos*) - vendor=crds - ;; - *-dgux*) - vendor=dg - ;; - *-luna*) - vendor=omron - ;; - *-genix*) - vendor=ns - ;; - *-clix*) - vendor=intergraph - ;; - *-mvs* | *-opened*) - vendor=ibm - ;; - *-os400*) - vendor=ibm - ;; - s390-* | s390x-*) - vendor=ibm - ;; - *-ptx*) - vendor=sequent - ;; - *-tpf*) - vendor=ibm - ;; - *-vxsim* | *-vxworks* | *-windiss*) - vendor=wrs - ;; - *-aux*) - vendor=apple - ;; - *-hms*) - vendor=hitachi - ;; - *-mpw* | *-macos*) - vendor=apple - ;; - *-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*) - vendor=atari - ;; - *-vos*) - vendor=stratus - ;; - esac - ;; -esac - -echo "$cpu-$vendor-${kernel:+$kernel-}$os" -exit - -# Local variables: -# eval: (add-hook 'before-save-hook 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff --git a/configure.in b/configure.in deleted file mode 100644 index faf2ac35a..000000000 --- a/configure.in +++ /dev/null @@ -1,737 +0,0 @@ -# -# Process this file with autoconf to produce a configure script -# -AC_PREREQ(2.59) -AC_INIT(tmLQCD, 6.0.0, curbach@gmx.de) -AC_CONFIG_HEADER(include/tmlqcd_config_internal.h) -AC_CONFIG_SRCDIR([hmc_tm.c]) -AC_CANONICAL_HOST() -AC_PREFIX_DEFAULT($HOME) -AC_ARG_PROGRAM - -if test "$host_vendor" = "cray"; then - ac_cv_c_bigendian=yes -fi - -AC_PROG_CC -AC_PROG_CC_C99 -dnl AC_PROG_CC_STDC -AC_C_CONST -AC_C_INLINE -AC_C_RESTRICT -AC_F77_LIBRARY_LDFLAGS -AC_CHECK_TOOL(AR, ar, [ar]) -LIBS="$LIBS $FLIBS -lm" - -AC_PROG_LEX -dnl AC_PROG_LEX sets $LEX to ":" if neither lex nor flex are found! -if test "$LEX" = ":"; then - AC_MSG_ERROR([(F)LEX is required for building read_input.c. Please install it and run configure again.]) -fi - -AC_PROG_MAKE_SET -AC_PROG_RANLIB -AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC") -AC_CHECK_PROG(CXXDEP, g++, "g++", "$CXX") -#(endian="", AC_DEFINE(LITTLE_ENDIAN,1,The endian of the architechture)) - -# AC_PROG_FC([ifort gfortran]) -# AC_FC_FUNC(testfunc, ) - -LDFLAGS="$LDFLAGS -L\${HOME}/lib -L\${top_builddir}/lib" -CCLD=${CC} - -# compilation in operator is slowest so we do it first, saves time in parallel compiles -USESUBDIRS="operator linalg solver monomial buffers cu io meas xchange init rational smearing wrapper" - -AC_CHECK_HEADERS([stdint.h], -[ dnl for inttypes.h and stdint.h for uint_xxx types - dnl if successful check for the actual types too - AC_CHECK_TYPES([uint16_t, uint32_t, uint64_t], - [], - [AC_MSG_ERROR([stdint.h found but either uint16_t, uint32_t or uint64_t not found]) ] - ) -], -[ - dnl no inttypes.h or stdint.h found check common unsigned types - dnl for sizes and make appropriate decisions in the lime_fixed_types.h file - AC_CHECK_SIZEOF(unsigned char) - AC_CHECK_SIZEOF(unsigned short) - AC_CHECK_SIZEOF(unsigned int) - AC_CHECK_SIZEOF(unsigned long) - AC_CHECK_SIZEOF(unsigned long long) -] -) - -AC_MSG_CHECKING(where to find lime) -AC_ARG_WITH(limedir, - AS_HELP_STRING([--with-limedir[=dir]], [search lime in dir [default=./lime]]), - lime_dir=$withval, lime_dir="./lime") -AC_MSG_RESULT($lime_dir) -LDFLAGS="$LDFLAGS -L${lime_dir}/lib/" -AC_CHECK_LIB([lime], [limeReaderNextRecord],[], - [AC_MSG_ERROR([library liblime is missing or needed function is not available])]) - -#LIBS="$LIBS $FLIBS -lm" - -AC_MSG_CHECKING(whether we want to use lemon) -AC_ARG_WITH(lemondir, - AS_HELP_STRING([--with-lemondir[=dir]], [use lemon, to be found in dir]), - [echo $withval - LEMON_AVAILABLE=1 - lemon_dir=$withval - LDFLAGS="$LDFLAGS -L${lemon_dir}/lib" - AC_CHECK_LIB([lemon], - [lemonReaderNextRecord], - [], - [AC_MSG_ERROR([library liblemon was not found])])], - [echo no - LEMON_AVAILABLE=0]) - -AC_MSG_CHECKING(whether we want to use MPI) -AC_ARG_ENABLE(mpi, - AS_HELP_STRING([--enable-mpi], [enable use of mpi [default=yes]]), - enable_mpi=$enableval, enable_mpi=yes) -if test $enable_mpi = yes; then - AC_MSG_RESULT(yes) - AC_DEFINE(TM_USE_MPI,1,Compile with MPI support) -else - AC_MSG_RESULT(no) -fi - -AC_MSG_CHECKING(whether we want to use DDalphaAMG) -AC_ARG_WITH(DDalphaAMG, - AS_HELP_STRING([--with-DDalphaAMG[=dir]], [use DDalphaAMG, to be found in dir]), - [echo $withval - DDalphaAMG_AVAILABLE=1 - DDalphaAMG_INTERFACE="DDalphaAMG_interface" - AC_DEFINE(DDalphaAMG,1,Using DDalphaAMG) - DDalphaAMG_dir=$withval - LDFLAGS="$LDFLAGS -L${DDalphaAMG_dir}/lib" - INCLUDES="$INCLUDES -I${DDalphaAMG_dir}/include/" - AC_CHECK_LIB([DDalphaAMG], - [DDalphaAMG_finalize], - [], - [AC_MSG_ERROR([library DDalphaAMG was not found])])], - [echo no - DDalphaAMG_AVAILABLE=0 - DDalphaAMG_INTERFACE="DDalphaAMG_interface" - ]) - -AC_MSG_CHECKING(whether we want to use OpenMP) -AC_ARG_ENABLE(omp, - AS_HELP_STRING([--enable-omp], [enable use of OpenMP [default=yes]]), - enable_omp=$enableval, enable_omp=yes) -if test $enable_omp = yes; then - AC_MSG_RESULT(yes) - AC_DEFINE(TM_USE_OMP,1,Compile with OpenMP support) - AC_CHECK_HEADERS([omp.h],,[AC_MSG_ERROR([Cannot find OpenMP headers!])]) - AC_OPENMP -# -- AC_OPENMP provides a compiler-dependent OPENMP_CFLAGS so we can set it here - CFLAGS="$CFLAGS $OPENMP_CFLAGS" - CPPFLAGS="$CPPFLAGS $OPENMP_CFLAGS" - LDFLAGS="$LDFLAGS $OPENMP_CFLAGS" -else - AC_MSG_RESULT(no) -fi - -fftw_lib=/usr -AC_MSG_CHECKING(whether we want to use FFTW) -AC_ARG_ENABLE(fftw, - AS_HELP_STRING([--enable-fftw], [enable use of fftw [default=no]]), - enable_fftw=$enableval, enable_fftw=no) -if test $enable_fftw = yes; then - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FFTW,1,Compile with FFTW support) - LIBS="-lfftw3 ${LIBS}" -elif test $enable_fftw = no; then - AC_MSG_RESULT(no) -else - AC_MSG_RESULT(yes) - AC_DEFINE(HAVE_FFTW,1,Compile with FFTW support) - fftw_lib=${enable_fftw} - LDFLAGS="$LDFLAGS -L${fftw_lib}/lib64" - LIBS="-lfftw3 ${LIBS}" - INCLUDES="-I${fftw_lib}/include ${INCLUDES}" -fi - -if test $enable_mpi = yes; then - AC_MSG_CHECKING(which parallelisation to use for MPI) - AC_ARG_WITH(mpidimension, - AS_HELP_STRING([--with-mpidimension[=n]], [use n dimensional parallelisation [default=1]]), - withmpidimension=$withval, withmpidimension=1) - if test $withmpidimension = 1; then - AC_MSG_RESULT(n=1 [t]) - AC_DEFINE(PARALLELT,1,One dimensional parallelisation) - elif test $withmpidimension = 2; then - AC_MSG_RESULT(n=2 [xt]) - AC_DEFINE(PARALLELXT,1,Two dimensional parallelisation) - elif test $withmpidimension = 3; then - AC_MSG_RESULT(n=3 [xyt]) - AC_DEFINE(PARALLELXYT,1,Three dimensional parallelisation) - elif test $withmpidimension = 4; then - AC_MSG_RESULT(n=4 [xyzt]) - AC_DEFINE(PARALLELXYZT,1,Four dimensional parallelisation) - elif test $withmpidimension = X; then - AC_MSG_RESULT(n=1 [x]) - AC_DEFINE(PARALLELX,1, X parallelisation) - elif test $withmpidimension = XY; then - AC_MSG_RESULT(n=2 [xy]) - AC_DEFINE(PARALLELXY,1, XY parallelisation) - elif test $withmpidimension = XYZ; then - AC_MSG_RESULT(n=3 [xyz]) - AC_DEFINE(PARALLELXYZ,1, XYZ parallelisation) - elif test $withmpidimension = T; then - AC_MSG_RESULT(n=1 [t]) - AC_DEFINE(PARALLELT,1, T parallelisation) - elif test $withmpidimension = XT; then - AC_MSG_RESULT(n=2 [xt]) - AC_DEFINE(PARALLELXT,1, XT parallelisation) - elif test $withmpidimension = XYT; then - AC_MSG_RESULT(n=3 [xyt]) - AC_DEFINE(PARALLELXYT,1, XYT parallelisation) - elif test $withmpidimension = XYZT; then - AC_MSG_RESULT(n=4 [xyzt]) - AC_DEFINE(PARALLELXYZT,1, XYZT parallelisation) - else - AC_MSG_RESULT(unknown) - AC_MSG_ERROR([Only t, xt, xyt, xyzt, x, xy, xyz parallelisation available]) - fi - - AC_MSG_CHECKING(whether we shall use persistent MPI calls for halfspinor) - AC_ARG_WITH([persistentmpi], - AS_HELP_STRING([--with-persistentmpi], [use persistent MPI calls for halfspinor [default=no]]), - withpersistent=$withval, withpersistent=no) - if test $withpersistent = yes; then - AC_MSG_RESULT(yes) - AC_DEFINE(_PERSISTENT,1,use persistent MPI calls for halfspinor) - else - AC_MSG_RESULT(no) - fi - - AC_MSG_CHECKING(whether we shall use non-blocking MPI calls) - AC_ARG_WITH([nonblockingmpi], - AS_HELP_STRING([--with-nonblockingmpi], [use non-blocking MPI calls for spinor and gauge [default=yes]]), - withnonblock=$withval, withnonblock=yes) - if test $withnonblock = yes; then - AC_MSG_RESULT(yes) - AC_DEFINE(_NON_BLOCKING,1,use non-blocking MPI calls for spinor ang gauge) - else - AC_MSG_RESULT(no) - fi -fi - -AC_MSG_CHECKING([whether we want to fix volume at compiletime]) -AC_ARG_WITH([fixedvolume], - AS_HELP_STRING([--with-fixedvolume], [fix volume at compiletime [default=no]]), - with_fixvol=$withval, with_fixvol=no) -if test $with_fixvol = yes; then - AC_MSG_RESULT(yes) - AC_DEFINE(FIXEDVOLUME,1,Fixed volume at compiletime) - AC_CONFIG_FILES([fixed_volume.h]) -else - AC_MSG_RESULT(no) -fi - -AC_MSG_CHECKING([whether we want to use KOJAK instrumentalisation]) -AC_ARG_WITH([kojakinst], - AS_HELP_STRING([--with-kojakinst], [instrumentalise for KOJAK [default=no]]), - with_kojakinst=$withval, with_kojakinst=no) -if test $with_kojakinst = yes; then - AC_MSG_RESULT(yes) - CC="kinst-pomp ${CC}" -else - AC_MSG_RESULT(no) -fi - -AC_MSG_CHECKING(whether we want to use lapack and blas) -AC_ARG_WITH(lapack, - AS_HELP_STRING([--with-lapack], [enable use of lapack [default=yes]]), - with_lapack=$withval, with_lapack=yes) -if test "$with_lapack" = yes; then - AC_MSG_RESULT(yes) - LAPACKLIB= - AC_DEFINE(HAVE_LAPACK,1,lapack available) -elif test "$with_lapack" != no; then - AC_MSG_RESULT(yes) - LIBS="$withval $LIBS" - with_lapack=yes - AC_DEFINE(HAVE_LAPACK,1,lapack available) -else - AC_MSG_RESULT(no) - AC_MSG_ERROR([lapack is needed! Will stop here.]) -fi - -if test $enable_mpi = yes; then - dnl In general one cannot run mpi programs directly - dnl thats why we need here cross_compiling=yes - dnl for non CRAY - if test "$host_vendor" != "cray"; then - cross_compiling=yes - fi -fi - -dnl for the case of other configure scripts -dnl AC_CONFIG_SUBDIRS( rng ) - -dnl check for clock_gettime and set correct library flag if one is required -dnl (this is done by AC_CHECK_LIB) -AC_CHECK_FUNCS(clock_gettime, [], [AC_CHECK_LIB(rt, clock_gettime)]) - -dnl in principle clock_gettime and CLOCK_MONOTONIC/CLOCK_REALTIME should be available -dnl only when using POSIX 199309, we set this explicitly here -dnl this should not cause problems on any relatively modern (post y2k) machine! -if ( test "$ac_cv_lib_rt_clock_gettime" = "yes" || test "$ac_cv_func_clock_gettime" = "yes" ); then - AC_DEFINE(HAVE_CLOCK_GETTIME,1) -dnl we set this in gettime.c explicitly for the time being -dnl due to endian problem on BG/Q -dnl CFLAGS="$CFLAGS -D_POSIX_C_SOURCE=199309L" - AC_MSG_NOTICE([Instructing the compiler to use POSIX 199309L]) -fi - -dnl Checks for lapack and defines proper name mangling scheme for -dnl linking with f77 code -AC_F77_FUNC(zheev) -if test "$zheev" = "zheev"; then - AC_DEFINE(NOF77_,1,Fortran has no extra _) -fi -AC_SEARCH_LIBS([$zheev],[lapack], [], [AC_MSG_ERROR([Cannot find lapack])]) - -dnl Checks for header files. -AC_HEADER_STDC -AC_CHECK_HEADERS([float.h libintl.h limits.h stdint.h stdlib.h string.h strings.h sys/time.h unistd.h endian.h]) -AC_CHECK_HEADER( getopt.h, []) - -dnl Checks for typedefs, structures, and compiler characteristics. -AC_C_CONST -AC_TYPE_OFF_T -AC_TYPE_SIZE_T -AC_HEADER_TIME - -dnl Checks for library functions. -AC_SYS_LARGEFILE -AC_FUNC_FSEEKO -AC_FUNC_MALLOC -AC_TYPE_SIGNAL -AC_CHECK_FUNCS([gettimeofday pow sqrt]) - -dnl We now define some replacement variables -AC_SUBST(OPTARGS) -AC_SUBST(SOPTARGS) -AC_SUBST(INCLUDES) -AC_SUBST(AUTOCONF) -AC_SUBST(SOLVEROUT) -AC_SUBST(CCDEP) -AC_SUBST(CXXDEP) -AC_SUBST(CCLD) -AC_SUBST(DEPFLAGS) -AC_SUBST(CXXDEPFLAGS) -AC_SUBST(DEBUG_FLAG) -AC_SUBST(PROFILE_FLAG) -AC_SUBST(XCHANGELIB) -AC_SUBST(XCHANGEDIR) -AC_SUBST(MEASDIR) -AC_SUBST(XLIB) -AC_SUBST([LEMON_AVAILABLE]) -AC_SUBST(QUDA_INTERFACE) -AC_SUBST(QPHIX_INTERFACE) -AC_SUBST(QPHIX_PROGRAMS) -AC_SUBST(DDalphaAMG_INTERFACE) - -INCLUDES="$INCLUDES -I\$(HOME)/include/ -I. -I\${abs_top_builddir}/ -I\${abs_top_builddir}/include/ -I\${abs_top_srcdir}/ -I\${abs_top_srcdir}/include/ -I${lime_dir}/include/ -I${lemon_dir}/include/" -DEPFLAGS="$DEPFLAGS" - -AC_MSG_CHECKING(what alignment we want for arrays) -AC_ARG_ENABLE(alignment, - [AS_HELP_STRING([--enable-alignment[=n]], [Automatically or expliclty align arrays to byte number: auto, none, 16, 32, 64 [default=auto]])], - withalign=$enableval, withalign=auto) -if test "$withalign" = "none"; then - AC_MSG_RESULT(none) - withalign=1 - AC_DEFINE(ALIGN_BASE, 0x00, [Align base]) - AC_DEFINE(ALIGN, []) - AC_DEFINE(ALIGN_BASE32, 0x00, [Align base32]) - AC_DEFINE(ALIGN32, [], []) -elif test $withalign = 16; then - AC_MSG_RESULT(16 bytes) - AC_DEFINE(ALIGN_BASE, 0x0F, [Align base]) - AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))]) - AC_DEFINE(ALIGN_BASE32, 0x0F, [Align base32]) - AC_DEFINE(ALIGN32, [__attribute__ ((aligned (16)))], []) -elif test $withalign = 32; then - AC_MSG_RESULT(32 bytes) - AC_DEFINE(ALIGN_BASE, 0x1F, [Align base]) - AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))]) - AC_DEFINE(ALIGN_BASE32, 0x1F, [Align base32]) - AC_DEFINE(ALIGN32, [__attribute__ ((aligned (32)))], []) -elif test $withalign = 64; then - AC_MSG_RESULT(64 bytes) - AC_DEFINE(ALIGN_BASE, 0x3F, [Align base]) - AC_DEFINE(ALIGN, [__attribute__ ((aligned (64)))]) - AC_DEFINE(ALIGN_BASE32, 0x3F, [Align base32]) - AC_DEFINE(ALIGN32, [__attribute__ ((aligned (64)))], []) -elif test $withalign = auto; then - withautoalign=1 - AC_MSG_RESULT(auto) - AC_DEFINE(ALIGN_BASE, 0x00, [Align base]) - AC_DEFINE(ALIGN, [], []) - AC_DEFINE(ALIGN_BASE32, 0x00, [Align base32]) - AC_DEFINE(ALIGN32, [], []) -else - AC_MSG_RESULT(Unusable value for array alignment) - AC_MSG_ERROR([Allowed values are: auto, none, 16, 32, 64]) -fi - -dnl We here check for alignment issues with QPX instructions -- this flag has been set earlier -if test $enable_qpx = yes; then - if test $withalign = auto; then - if test $withautoalign -lt 32; then - AC_MSG_RESULT(increasing array alignment to 32 bytes for use of QPX instructions on BG/Q) - AC_DEFINE(ALIGN_BASE, 0x1F, [Align base]) - AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))]) - AC_MSG_RESULT(increasing 32bit array alignment to 16 bytes for use of QPX instructions on BG/Q) - AC_DEFINE(ALIGN_BASE32, 0x0F, [Align base32]) - AC_DEFINE(ALIGN32, [__attribute__ ((aligned (16)))]) - withautoalign=32 - fi - elif test $withalign -lt 32; then - AC_MSG_ERROR([alignment incompatible with QPX instructions (32 bytes required)]) - fi -fi - -dnl Check for alignment associated with (non-QPX) BG optimization. -dnl This will also result in using 32 byte alignment on MareNostrum, but that should be fairly innocuous. -if test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_os" = "blrts"; then - if test $withalign = auto; then - if test $withautoalign -lt 16; then - AC_MSG_RESULT(increasing array alignment to 16 bytes for BG/L optimization) - AC_DEFINE(ALIGN_BASE, 0x0F, [Align base]) - AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))], [Align base]) - withautoalign=16 - fi - fi -elif test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_os" = "bprts"; then - if test $withalign = auto; then - if test $withautoalign -lt 16; then - AC_MSG_RESULT(increasing array alignment to 16 bytes for BG/P optimization) - AC_DEFINE(ALIGN_BASE, 0x0F, [Align base]) - AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))], [Align base]) - withautoalign=16 - fi - fi -elif test "$host_cpu" = "powerpc64" && test "$host_vendor" = "unknown" && test "$host_os" = "linux-gnu"; then - if test $withalign = auto; then - if test $withautoalign -lt 32; then - AC_MSG_RESULT(increasing array alignment to 32 bytes for BG/Q and generic POWER optimization) - AC_DEFINE(ALIGN_BASE, 0x1F, [Align base]) - AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))]) - AC_MSG_RESULT(increasing array 32 bit alignment to 16 bytes for BG/Q and generic POWER optimization) - AC_DEFINE(ALIGN_BASE32, 0x0F, [Align base]) - AC_DEFINE(ALIGN32, [__attribute__ ((aligned (16)))]) - withautoalign=32 - fi - fi -fi - -AC_MSG_CHECKING(whether we want to use gprof as profiler) -AC_ARG_WITH(gprof, - AS_HELP_STRING([--with-gprof], [use of gprof profiler [default=no]]), - enable_gprof=$withval, enable_gprof=no) -if test $enable_gprof = yes; then - AC_MSG_RESULT(yes) - if test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm"; then - PROFILE_FLAG="-pg -qfullpath -g" - else - PROFILE_FLAG="-pg -g" - fi -else - AC_MSG_RESULT(no) - PROFILE_FLAG= -fi - -dnl Now we have to set all Flags and compiler properly -PGCC=`$CC -V 2>&1 | grep pgcc` -ICC=`$CC -V 2>&1 | grep -i intel` - -dnl first for PC's -if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then -dnl the GNU compiler - if test "$GCC" = yes && test "$ICC" = ""; then - DEPFLAGS="-MM" - CFLAGS="$CFLAGS -pedantic -Wall" - OPTARGS='-O' - SOPTARGS='-O' - - if test "$host_cpu" = "x86_64"; then - AC_DEFINE(_x86_64,1,x86 64 Bit architecture) - fi - CCDEP="$CC" - if test $enable_mpi = yes; then - CCDEP="gcc" - fi - CXXDEP="$CXX" - if test $enable_mpi = yes; then - CXXDEP="g++" - fi - DEBUG_FLAG="-g" -dnl other compilers - else -dnl check for pgcc - if test "$PGCC" != ""; then - DEPFLAGS="-M" - echo "We are using the Portland Group C compiler!" - OPTARGS="-O2" - SOPTARGS="-O2" - DEBUG_FLAG="-g" - PROFILE_FLAG="-p -g" - CCDEP="$CC" - -dnl check for icc - elif test "$ICC" != ""; then - echo "We are using the Intel C compiler!" - DEPFLAGS="-M" - OPTARGS="-O3" - SOPTARGS="-O3" - DEBUG_FLAG="-g" - PROFILE_FLAG="-p -g" - CCDEP="$CC" - CXXDEP="$CXX" - else - DEPFLAGS="-M" - CFLAGS="$CFLAGS -O" - DEBUG_FLAG="-g" - CCDEP="$CC" - CXXDEP="$CXX" - fi - fi -# The CRAY -elif test "$host_vendor" = "cray"; then - echo - echo "Hey, we are on a cray, you should take some time for this..." - echo "get yourself a coffee or so!" - echo - CFLAGS="$CFLAGS -dp" - AC_DEFINE(CRAY,1,We are on a CRAY) - OPTARGS="-O3" - SOPTARGS="-O3" - DEBUG_FLAG="-g" - CCDEP="$CC" - DEPFLAGS="-M" -else - AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC") - if test "$CCDEP" = "gcc"; then - DEPFLAGS="-MM" - else - DEPFLAGS="-M" - fi - OPTARGS= - SOPTARGS= -fi - -CXXDEPFLAGS="$DEPFLAGS --std=c++11" - -AC_MSG_CHECKING(whether we want to switch on optimisation) -AC_ARG_ENABLE(optimize, - AS_HELP_STRING([--enable-optimize], [enable optimisation [default=yes]]), - enable_optimize=$enableval, enable_optimize=yes) -if test $enable_optimize = no; then - AC_MSG_RESULT(no) - OPTARGS= - SOPTARGS= -else - AC_MSG_RESULT(yes) -fi - -AC_MSG_CHECKING(whether we want to use a copy of the gauge field) -AC_ARG_ENABLE(gaugecopy, - AS_HELP_STRING([--enable-gaugecopy], [enable use of a copy of the gauge field [default=yes]]), - enable_gaugecopy=$enableval, enable_gaugecopy=yes) -if test $enable_gaugecopy = yes; then - AC_MSG_RESULT(yes) - AC_DEFINE(_GAUGE_COPY,1,Construct an extra copy of the gauge fields) -else - AC_MSG_RESULT(no) -fi - -AC_MSG_CHECKING(whether we want to use a Dirac Op. with halfspinor exchange) -AC_ARG_ENABLE(halfspinor, - AS_HELP_STRING([--enable-halfspinor], [use a Dirac Op. with halfspinor exchange [default=yes]]), - enable_halfspinor=$enableval, enable_halfspinor=yes) -if test $enable_halfspinor = yes; then - AC_MSG_RESULT(yes) - AC_DEFINE(_USE_HALFSPINOR,1,Exchange only a halfspinor in the Dirac Operator) - if test $enable_gaugecopy = no; then - AC_MSG_WARN([switching on gaugecopy for Dirac operator with halfspinor!]) - AC_DEFINE(_GAUGE_COPY,1,Construct an extra copy of the gauge fields) - fi -else - AC_MSG_RESULT(no) -fi - -AC_MSG_CHECKING(whether we want to use shmem API) -AC_ARG_ENABLE(shmem, - AS_HELP_STRING([--enable-shmem],[use shmem API [default=no]]), - enable_shmem=$enableval, enable_shmem=no) -if test $enable_shmem = yes; then - AC_MSG_RESULT(yes) - AC_DEFINE(_USE_SHMEM,1,Use shmem API) - LIBS="$LIBS -lsma" -else - AC_MSG_RESULT(no) -fi - - -AC_SUBST(USESUBDIRS) - -AC_MSG_CHECKING(whether we want to use CUDA) -AC_ARG_WITH(cudadir, - AS_HELP_STRING([--with-cudadir[=dir]], [use CUDA library (specify 'lib' directory)]), - [AC_MSG_RESULT($withval) - CUDA_AVAILABLE=1 - cuda_dir=$withval - LDFLAGS="$LDFLAGS -L${cuda_dir} -lcuda" - AC_CHECK_LIB([cudart], - [cudaMalloc], - [], - [AC_MSG_ERROR([Can't link a simple program against library cudart.])])], - [AC_MSG_RESULT(no) - CUDA_AVAILABLE=0]) - -AC_MSG_CHECKING(whether we want to use HIP) -AC_ARG_WITH(hipdir, - AS_HELP_STRING([--with-hipdir[=dir]], [use HIP library (specify 'lib' directory)]), - [AC_MSG_RESULT($withval) - HIP_AVAILABLE=1 - hip_dir=$withval - LDFLAGS="$LDFLAGS -L${hip_dir} -lamdhip64" - AC_CHECK_LIB([amdhip64], - [hipMalloc], - [], - [AC_MSG_ERROR([Can't link a simple program against library amdhip64.])])], - [AC_MSG_RESULT(no) - HIP_AVAILABLE=0]) - - -# QUDA library for GPUs -AC_MSG_CHECKING(whether we want to use QUDA) -AC_ARG_WITH(qudadir, - AS_HELP_STRING([--with-qudadir[=dir]], [use QUDA library (specify directory which contains 'include' and 'lib' subdirs)]), - [AC_MSG_RESULT($withval) - if test $CUDA_AVAILABLE -ne 1 && test $HIP_AVAILABLE -ne 1; then - AC_MSG_ERROR([Need either CUDA or HIP to link against QUDA!]) - fi - QUDA_AVAILABLE=1 - AC_DEFINE(TM_USE_QUDA,1,Using QUDA GPU) - quda_dir=$withval - LDFLAGS="$LDFLAGS -L${quda_dir}/lib" - INCLUDES="$INCLUDES -I${quda_dir}/include/" - QUDA_INTERFACE="quda_interface" - AC_CHECK_LIB([quda], - [freeGaugeQuda], - [], - [AC_MSG_ERROR([Can't link a simple program against library libquda. (Did you set CXX properly?)])] - ) - #QUDA needs to be linked with C++ linker - CCLD=${CXX} - ], - [AC_MSG_RESULT(no) - QUDA_AVAILABLE=0 - QUDA_INTERFACE="" - ] - ) -AC_SUBST([QUDA_AVAILABLE]) - -AC_MSG_CHECKING(whether the QUDA version is experimental) -AC_ARG_ENABLE(quda_experimental, - AS_HELP_STRING([--enable-quda_experimental], [enable support for experimental QUDA versions [default=no]]), - enable_quda_experimental=$enableval, enable_quda_experimental=no) -if test $enable_quda_experimental = yes; then - AC_MSG_RESULT(yes) - AC_DEFINE(TM_QUDA_EXPERIMENTAL,1,Experimental QUDA version in use) -else - AC_MSG_RESULT(no) -fi -AC_MSG_CHECKING(whether the QUDA force is enabled) -AC_ARG_ENABLE(quda_fermionic_forces, - AS_HELP_STRING([--enable-quda_fermionic_forces], [enable support for fermionic forces using QUDA [default=yes]]), - enable_quda_fermionic_forces=$enableval, enable_quda_fermionic_forces=yes) -if test $enable_quda_fermionic_forces = no; then - AC_MSG_RESULT(no) -else - AC_MSG_RESULT(yes) - AC_DEFINE(TM_QUDA_FERMIONIC_FORCES,1, fermionic forces with QUDA are enabled) -fi - -# QPhiX library for Intel Xeon and Xeon Phis -AC_MSG_CHECKING(whether we want to use QPhiX) -AC_ARG_WITH(qphixdir, - AS_HELP_STRING([--with-qphixdir[=dir]], [use QPhiX, to be found in dir]), - [echo yes - QPHIX_AVAILABLE=1 - AC_DEFINE(TM_USE_QPHIX,1,Using QPhiX) - qphix_dir=$withval - LDFLAGS="$LDFLAGS -L${qphix_dir}/lib -lqphix_solver -lqphix_codegen" - INCLUDES="$INCLUDES -I${qphix_dir}/include/" - QPHIX_INTERFACE="qphix_interface" - QPHIX_PROGRAMS="" - # Due to github issue #404, the qphix test_Dslash code has been disabled by BaKo - # for the time being - # it should be updated to make use of the QPhiX internal interfaces - # for passing full lattice spinors - # "qphix_test_Dslash" - - # QMP: TODO AC_CHECK_LIB - AC_MSG_CHECKING([where to search for QMP libs]) - AC_ARG_WITH(qmpdir, - AS_HELP_STRING([--with-qmpdir[=dir]], [if using QPhiX, then set QMP lib dir]), - qmp_dir=$withval - LDFLAGS="$LDFLAGS -L${qmp_dir}/lib -lqmp" - INCLUDES="$INCLUDES -I${qmp_dir}/include/" - ) - AC_MSG_RESULT($qmp_dir) - - AC_MSG_CHECKING([Setting QPhiX SOALEN]) - AC_ARG_ENABLE(qphix-soalen, - AS_HELP_STRING([--enable-qphix-soalen], [if using QPhiX, set SOALEN [default=4]]), - enable_qphix_soalen=$enableval, enable_qphix_soalen=4) - AC_MSG_RESULT($enable_qphix_soalen) - AC_DEFINE_UNQUOTED(QPHIX_SOALEN, ${enable_qphix_soalen}, Structure of Array length to use with QPhiX) - - AC_PROG_CXX - #QPhiX needs to be linked with C++ linker - CCLD=${CXX} - ], - [echo no - QPHIX_AVAILABLE=0 - QPHIX_INTERFACE=""]) -AC_SUBST([QPHIX_AVAILABLE]) - -if test ! -e lib; then - mkdir lib -fi - -dnl create the test and tests directory here -if test ! -e test; then - mkdir test -fi - -if test ! -e tests; then - mkdir tests -fi - -if test ! -e tests/regressions; then - mkdir tests/regressions -fi - - -LIBS="-lhmc -lmonomial -loperator -lsolver -linit -lmeas -llinalg -lhmc -lxchange -lrational -lio $LIBS" -AUTOCONF=autoconf - -for i in $USESUBDIRS -do - make_files="$make_files $i/Makefile" -done - -AC_CONFIG_FILES([Makefile $make_files]) - -AC_OUTPUT diff --git a/src/lib/profiling/hmc/Readme.md b/profiling/hmc/Readme.md similarity index 100% rename from src/lib/profiling/hmc/Readme.md rename to profiling/hmc/Readme.md diff --git a/src/lib/profiling/hmc/example_profile.pdf b/profiling/hmc/example_profile.pdf similarity index 100% rename from src/lib/profiling/hmc/example_profile.pdf rename to profiling/hmc/example_profile.pdf diff --git a/src/lib/profiling/hmc/profile.Rmd b/profiling/hmc/profile.Rmd similarity index 100% rename from src/lib/profiling/hmc/profile.Rmd rename to profiling/hmc/profile.Rmd diff --git a/src/lib/profiling/hmc/timing.R b/profiling/hmc/timing.R similarity index 100% rename from src/lib/profiling/hmc/timing.R rename to profiling/hmc/timing.R diff --git a/src/lib/profiling/hmc_mk2/.gitignore b/profiling/hmc_mk2/.gitignore similarity index 100% rename from src/lib/profiling/hmc_mk2/.gitignore rename to profiling/hmc_mk2/.gitignore diff --git a/src/lib/profiling/hmc_mk2/README.md b/profiling/hmc_mk2/README.md similarity index 100% rename from src/lib/profiling/hmc_mk2/README.md rename to profiling/hmc_mk2/README.md diff --git a/src/lib/profiling/hmc_mk2/logs/example_log.out b/profiling/hmc_mk2/logs/example_log.out similarity index 99% rename from src/lib/profiling/hmc_mk2/logs/example_log.out rename to profiling/hmc_mk2/logs/example_log.out index faf4874bf..22ec86ec9 100644 --- a/src/lib/profiling/hmc_mk2/logs/example_log.out +++ b/profiling/hmc_mk2/logs/example_log.out @@ -270,8 +270,8 @@ operator 0 parsed line 229 This is the hmc code for twisted mass Wilson QCD Version 5.2.0, commit 51cf008a89944ecdd9345cdb62aaf0a203a7f306 -# The code is compiled with -D_GAUGE_COPY -# The code is compiled with -D_USE_HALFSPINOR +# The code is compiled with -DTM_GAUGE_COPY +# The code is compiled with -DTM_USE_HALFSPINOR # the code is compiled for non-blocking MPI calls (spinor and gauge) # the code is compiled with openMP support # Non-Schroedinger (anti-periodic, periodic or twisted) boundary conditions are used diff --git a/src/lib/profiling/hmc_mk2/make_profile.R b/profiling/hmc_mk2/make_profile.R similarity index 100% rename from src/lib/profiling/hmc_mk2/make_profile.R rename to profiling/hmc_mk2/make_profile.R diff --git a/src/lib/profiling/hmc_mk2/profile.Rmd b/profiling/hmc_mk2/profile.Rmd similarity index 100% rename from src/lib/profiling/hmc_mk2/profile.Rmd rename to profiling/hmc_mk2/profile.Rmd diff --git a/qphix_base_classes.hpp b/qphix_base_classes.hpp deleted file mode 100644 index 26015e3a2..000000000 --- a/qphix_base_classes.hpp +++ /dev/null @@ -1,771 +0,0 @@ -// Copyright © 2017 Martin Ueding -// Licensed unter the [BSD-3-Clause](https://opensource.org/licenses/BSD-3-Clause). - -// Due to github issue #404, the helper functions to apply the full QPhiX operator -// are currently disabled because they conflict with the new interfaces in QPhiX -// itself. If required, these should be rewritten to use these interfaces -// rather than the base classes in qphix_base_classes.hpp - -// This file should be deprecated or updated to provide any functionality -// not covered by QPhiX itself. - -/** - \file Additions to QPhiX that are only needed for tmLQCD. - - In the original QPhiX, there are only Wilson fermions and Wilson clover - fermions. The Dslash operators have a different call signature (the latter - requiring a clover term), so there is no common base class. With the addition - of Wilson twisted mass (Mario) and Wilson twisted clover (Peter), there are - now two instances of the Dslash that have the same signature. In order to - write a more general even-odd source preparation and solution reconstruction - code, a common base class for non-clover and clover is desired. In order to - leave the QPhiX code untouched (for now), this code lives here in tmLQCD. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -namespace tmlqcd { - -namespace { -size_t constexpr re = 0; -size_t constexpr im = 1; -int const n_blas_simt = 1; - -// The even checkerboard is given by ( (x + y + z + t ) & 1 == 0 ) -> cb0 is even -int constexpr cb_even = 0; -int constexpr cb_odd = 1; -} - -/** - Complex multiplication accumulate. - - Computes \f$ (r + \mathrm i i) += (a + \mathrm i b) * (c + \mathrm i d) \f$. - */ -template -void cplx_mul_acc(FT &r_out, FT &i_out, FT const &a, FT const &b, FT const &c, FT const &d) { - r_out += a * c - b * d; - i_out += a * d + b * c; -} - -/** - Wrapper for the clover multiplication function. - - The `struct` is needed in order to allow for partial template specialization in the `Clover` - parameter. - - \tparam Clover Type of clover block to use, must be a type from Geometry such that there exists a - specialization for it. - */ -template -struct InnerCloverProduct { - /** - Multiplies the clover term for a single lattice size to a spinor. - - This function is intended to be used in a loop over all lattice sites. It is expected from the - caller to have figured out all the correct indices. There are template specializations for the two - different types of clover term that are used in QPhiX. - - \param[out] out Output spinor block. It is assumed to be zeroed properly, the function will just - accumulate values into that output variable. Use \ref QPhiX::zeroSpinor for that. - \param[in] in Input spinor block. - \param[in] clover Single clover block that contains the lattice site of the spinor. - \param[in] xi SIMD index for the arrays with length `soalen`, as in the spinors. - \param[in] veclen_idx SIMD index for the arrays with length `veclen`, as in the clover term. - */ - static void multiply( - typename ::QPhiX::Geometry::FourSpinorBlock &out, - typename ::QPhiX::Geometry::FourSpinorBlock const &in, - Clover const &clover, int const xi, int const veclen_idx); -}; - -template -struct InnerCloverProduct::CloverBlock> { - static void multiply( - typename ::QPhiX::Geometry::FourSpinorBlock &spinor_out, - typename ::QPhiX::Geometry::FourSpinorBlock const &spinor_in, - typename ::QPhiX::Geometry::CloverBlock const &clov_block, - int const xi, int const veclen_idx) { - // The clover term is block-diagonal in spin. Therefore we need - // to iterate over the two blocks of spin. - for (auto s_block : {0, 1}) { - // Extract the diagonal and triangular parts. - auto const &diag_in = s_block == 0 ? clov_block.diag1 : clov_block.diag2; - auto const &off_diag_in = s_block == 0 ? clov_block.off_diag1 : clov_block.off_diag2; - // Input two-spinor component. - for (auto two_s_in : {0, 1}) { - // Reconstruct four spinor index. - auto const four_s_in = 2 * s_block + two_s_in; - // Output two-spinor component. - for (auto two_s_out : {0, 1}) { - // Reconstruct four spinor index. - auto const four_s_out = 2 * s_block + two_s_out; - // Input color. - for (auto c_in : {0, 1, 2}) { - // Spin-color index (0, ..., 5). - auto const sc_in = 3 * two_s_in + c_in; - // Output color. - for (auto c_out : {0, 1, 2}) { - // Spin-color index (0, ..., 5). - auto const sc_out = 3 * two_s_out + c_out; - - // See `qphix-codegen` file `dslash_common.cc` - // function - // `clover_term` for the index manipulations done - // here. - - // Using separate loops over the actual indices is - // probably - // faster than the branching in the innermost loop. - - if (sc_out == sc_in) { - cplx_mul_acc(spinor_out[c_out][four_s_out][re][xi], - spinor_out[c_out][four_s_out][im][xi], diag_in[sc_in][veclen_idx], - QPhiX::rep(0.0), spinor_in[c_in][four_s_in][re][xi], - spinor_in[c_in][four_s_in][im][xi]); - } else if (sc_out < sc_in) { - auto const idx15 = sc_in * (sc_in - 1) / 2 + sc_out; - cplx_mul_acc( - spinor_out[c_out][four_s_out][re][xi], spinor_out[c_out][four_s_out][im][xi], - off_diag_in[idx15][re][veclen_idx], - // aww hell, maybe one should just add negation to QPhiX::half ? - QPhiX::rep(-QPhiX::rep(off_diag_in[idx15][im][veclen_idx])), - spinor_in[c_in][four_s_in][re][xi], spinor_in[c_in][four_s_in][im][xi]); - } else { - auto const idx15 = sc_out * (sc_out - 1) / 2 + sc_in; - cplx_mul_acc( - spinor_out[c_out][four_s_out][re][xi], spinor_out[c_out][four_s_out][im][xi], - off_diag_in[idx15][re][veclen_idx], off_diag_in[idx15][im][veclen_idx], - spinor_in[c_in][four_s_in][re][xi], spinor_in[c_in][four_s_in][im][xi]); - } - } - } - } - } - } - } -}; - -template -struct InnerCloverProduct< - FT, veclen, soalen, compress12, - typename ::QPhiX::Geometry::FullCloverBlock> { - static void multiply( - typename ::QPhiX::Geometry::FourSpinorBlock &spinor_out, - typename ::QPhiX::Geometry::FourSpinorBlock const &spinor_in, - typename ::QPhiX::Geometry::FullCloverBlock const &clov_block, - int const xi, int const veclen_idx) { - // The clover term is block-diagonal in spin. Therefore we need - // to iterate over the two blocks of spin. - for (auto s_block : {0, 1}) { - // handy reference to half-spinor block - auto const &block_in = s_block == 0 ? clov_block.block1 : clov_block.block2; - // Input two-spinor component. - for (auto two_s_in : {0, 1}) { - // Reconstruct four spinor index. - auto const four_s_in = 2 * s_block + two_s_in; - // Output two-spinor component. - for (auto two_s_out : {0, 1}) { - // Reconstruct four spinor index. - auto const four_s_out = 2 * s_block + two_s_out; - // Input color. - for (auto c_in : {0, 1, 2}) { - // Spin-color index (0, ..., 5). - auto const sc_in = 3 * two_s_in + c_in; - // Output color. - for (auto c_out : {0, 1, 2}) { - // Spin-color index (0, ..., 5). - auto const sc_out = 3 * two_s_out + c_out; - - cplx_mul_acc( - spinor_out[c_out][four_s_out][re][xi], spinor_out[c_out][four_s_out][im][xi], - block_in[sc_out][sc_in][re][veclen_idx], block_in[sc_out][sc_in][im][veclen_idx], - spinor_in[c_in][four_s_in][re][xi], spinor_in[c_in][four_s_in][im][xi]); - } - } - } - } - } - } -}; - -/** - Multiplies a checkerboarded QPhiX Clover term with a checkerboarded QPhiX spinor. - - Padding is taken care of. A test case for (a copy of) this function exists in QPhiX. - - If the preprocessor macro `PRINT_MAPPING` is defined, it will print out the mapping of `(x, y, z, - t)` coordinates to block indices. Also it will check that each block is accessed the proper number - of times, that is `soalen` for spinors and `veclen` for clover blocks. - - \param[out] out Output spinor - \param[in] in Input spinor - \param[in] clover Clover block - \param[in] geom Geometry object holding the dimension of clover and spinor - */ -template -void clover_product( - typename ::QPhiX::Geometry::FourSpinorBlock *const out, - typename ::QPhiX::Geometry::FourSpinorBlock const *const in, - Clover *clover, ::QPhiX::Geometry &geom) { - ::QPhiX::zeroSpinor(out, geom, n_blas_simt); - -#ifdef PRINT_MAPPING - std::vector spin_touches(geom.getPxyz() * geom.Nt(), 0); - std::vector clover_touches(geom.getPxyz() * geom.Nt() * soalen / veclen, 0); - - std::cout << std::setw(3) << "x" << std::setw(3) << "y" << std::setw(3) << "z" << std::setw(3) - << "t" - << ":" << std::setw(5) << "spin" << std::setw(5) << "clov" - << "\n"; -#endif - - // Iterate through all the block. - for (int t = 0; t < geom.Nt(); ++t) { - for (int z = 0; z < geom.Nz(); ++z) { - for (int y = 0; y < geom.Ny(); ++y) { - for (int x = 0; x < geom.Nxh(); ++x) { - // First element in the current XY plane at desired Z and T. - auto const xyBase = t * geom.getPxyz() + z * geom.getPxy(); - // Index of the SoA along the X direction. - auto const xb = x / soalen; - // Index within the SoA. - auto const xi = x % soalen; - // Global spin block index. - auto const spin_block_idx = xb + geom.Nxh() / soalen * y + xyBase; - // Global clover/gauge block index. - auto const clov_block_idx = - xb + (y / geom.nGY()) * geom.Nxh() / soalen + xyBase / geom.nGY(); - // Index of the SoA structure within the current tile. - // auto const tile = (geom.Nxh() / soalen * y + xyBase) % geom.nGY(); - auto const tile = y % geom.nGY(); - // Vector index for clover/gauge. The SoA index only runs to - // `soalen`, this index needs to run to `veclen`, that is across the - // various SoA within the tile. - auto const veclen_idx = soalen * tile + xi; - -#ifdef PRINT_MAPPING - ++spin_touches[spin_block_idx]; - ++clover_touches[clov_block_idx]; - - std::cout << std::setw(3) << x << std::setw(3) << y << std::setw(3) << z << std::setw(3) - << t << ":" << std::setw(5) << spin_block_idx << std::setw(5) << clov_block_idx - << "\n"; -#endif - - assert(xi + xb * soalen == x); - - // References to the objects at desired block. - auto const &clov_block = clover[clov_block_idx]; - auto const &spinor_in = in[spin_block_idx]; - auto &spinor_out = out[spin_block_idx]; - - InnerCloverProduct::multiply( - spinor_out, spinor_in, clov_block, xi, veclen_idx); - } - } - } - } - -#ifdef PRINT_MAPPING - std::cout << std::flush; - - // Make sure that each block got touched the correct number of times. - for (int i = 0; i != spin_touches.size(); ++i) { - if (spin_touches[i] != soalen) { - std::cout << "Spin missmatch: Block " << std::setw(4) << i << " accessed " << std::setw(4) - << spin_touches[i] << " times instead of " << soalen << "\n"; - } - } - - for (int i = 0; i != clover_touches.size(); ++i) { - if (clover_touches[i] != veclen) { - std::cout << "Clover missmatch: Block " << std::setw(4) << i << " accessed " << std::setw(4) - << clover_touches[i] << " times instead of " << veclen << "\n"; - } - } - - std::cout << std::flush; -#endif -} - -/** - Abstract base class for all single-flavor Dslash variants. - - There are four Dslash operators which are implemented in QPhiX: - - - Wilson - - Wilson clover - - Wilson twisted mass - - Wilson clover with twisted mass - - Each of these has a the actual Dslash operation and a so-called “achimbdpsi” operation. These act - on four-spinors given a gauge field. This base class provides a uniform interface to all four - kinds. - - This code should eventually be migrated into the QPhiX repository. Currently these classes are - mere delegators. In the QPhiX repository, the actual classes there should be used as concrete - classes. - */ -template -class Dslash { - public: - typedef ::QPhiX::Geometry Geom; - typedef typename Geom::FourSpinorBlock Spinor; - typedef typename Geom::SU3MatrixBlock SU3MatrixBlock; - - explicit Dslash(Geom *geom, double const t_boundary_, double const aniso_coeff_S_, - double const aniso_coeff_T_, double const mass_, bool use_tbc_[4] = nullptr, - double tbc_phases_[4][2] = nullptr) - : geom(geom), - t_boundary(t_boundary_), - aniso_coeff_S(aniso_coeff_S_), - aniso_coeff_T(aniso_coeff_T_), - mass(mass_) {} - - /** - Computes \f$ \psi_\mathrm o = A_\mathrm{oo} \chi_\mathrm o \f$. - - The actual definition of the matrix \f$ A_\mathrm{oo} \f$ is - implementation dependent and can be the mass factor \f$ \alpha = 4 + m - \f$ for plain Wilson or something more complicated for twisted mass. - - \param[out] out Output spinor \f$ \psi \f$. - \param[in] in Input spinor \f$ \chi \f$. - */ - virtual void A_chi(Spinor *const out, Spinor const *const in, int const isign, int const cb) = 0; - - /** - Computes \f$ \psi_\mathrm e = A_\mathrm{ee}^{-1} \chi_\mathrm e \f$. - - \param[out] out Output spinor \f$ \psi \f$. - \param[in] in Input spinor \f$ \chi \f$. - */ - virtual void A_inv_chi(Spinor *const out, Spinor const *const in, int const isign, - int const cb) = 0; - - /** - Forwarder for the `dslash`. - - This will call the `dslash` function of the respective QPhiX dslash class. There is a subtle - difference between the Wilson and all other cases. The Wilson dslash is just the hopping matrix, - just the operator \f$ D \f$. For every other case (clover, twisted mass, twisted mass clover), - the `dslash` member function will compute \f$ A^{-1} D \f$. In the Wilson case, this \f$ A = - \alpha = 4 + m = 1/(2 \kappa) \f$. Since that is _not_ included in the Wilson `dslash`, you will - obtain different results when using WilsonDslash::dslash and WilsonTMDslash::dslash with \f$ - \mu = 0 \f$. - - \todo Make this member function `const`. For this the member function in - QPhiX that is called internally must be marked `const` as well. - */ - virtual void dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, - int const isign, int const cb) = 0; - - /** - Always plain Wilson dslash. - - In contrast to the \ref dslash member function which just forwards the implementation of QPhiX, - this will always give you the “naked” plain Wilson dslash without any factors of \f$ A^{-1} \f$ - applied. - */ - virtual void plain_dslash(Spinor *const res, const Spinor *const psi, - const SU3MatrixBlock *const u, int const isign, int const cb) { - // XXX Perhaps rather implement this with an instance of the WilsonDslash instead? - - auto tmp = QPhiX::makeFourSpinorHandle(*geom); - dslash(tmp.get(), psi, u, isign, cb); - A_chi(res, tmp.get(), isign, cb); - }; - - /** - Always “dressed” dslash. - - This computes \f$ A^{-1} D \f$ for all variants. In the Wilson case, this will give \f$ - \alpha^{-1} D \f$. - */ - virtual void A_inv_dslash(Spinor *const res, const Spinor *const psi, - const SU3MatrixBlock *const u, int const isign, int const cb) { - dslash(res, psi, u, isign, cb); - }; - - /** - Forwarder for the `achimbdpsi`. - - \todo Make this member function `const`. For this the member function in QPhiX that is called - internally must be marked `const` as well. - */ - virtual void achimbdpsi(Spinor *const res, const Spinor *const psi, const Spinor *const chi, - const SU3MatrixBlock *const u, double const alpha, double const beta, - int const isign, int const cb) = 0; - - /** - Prepares the sources on the odd checkerboard. - - This computes - \f[ - \tilde b_o = \frac 12 D_{oe} M_{ee}^{-1} b_e + b_o \,. - \f] - - \param[out] tilde_b_odd Prepared source - \param[in] b_even Source (right hand side) on the even lattice sites - \param]in] b_odd Source on the odd lattice sites - \param[in] u Gauge field on the odd lattice sites - */ - virtual void prepare_source(Spinor *const tilde_b_odd, Spinor const *const b_even, - Spinor const *const b_odd, SU3MatrixBlock const *const u); - - /** - Reconstructs the solution on the even lattices sites. - - This computes - \f[ - x_e = M_{ee}^{-1} \left( b_e - \frac 12 D_{eo} x_o \right) \,. - \f] - - \param[out] x_even Solution on the even lattices sites - \param[in] b_even Source (right hand side) on the even lattice sites - \param[in] x_odd Solution on the odd lattices sites - \param[in] u Gauge field on the even lattice sites - */ - virtual void reconstruct_solution(Spinor *const x_even, Spinor const *const b_even, - Spinor const *const x_odd, SU3MatrixBlock const *const u); - - Geom *getGeometry() const { return geom; } - - private: - Geom *const geom; - - double const t_boundary; - double const aniso_coeff_S; - double const aniso_coeff_T; - double const mass; -}; - -template -class WilsonDslash : public Dslash { - public: - typedef typename ::QPhiX::Geometry::FourSpinorBlock Spinor; - typedef typename ::QPhiX::Geometry::SU3MatrixBlock SU3MatrixBlock; - - WilsonDslash(::QPhiX::Geometry *geom_, double const t_boundary_, - double const aniso_coeff_S_, double const aniso_coeff_T_, double const mass_, - bool use_tbc_[4] = nullptr, double tbc_phases_[4][2] = nullptr) - : Dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, - mass_, use_tbc_, tbc_phases_), - upstream_dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, use_tbc_, tbc_phases_), - mass_factor_alpha(4.0 + mass_), - mass_factor_beta(1.0 / (4.0 * mass_factor_alpha)) {} - - void A_chi(Spinor *const out, Spinor const *const in, int const isign_ignored, - int const cb_ignored) override { - int const n_blas_simt = 1; - ::QPhiX::axy(mass_factor_alpha, in, out, upstream_dslash.getGeometry(), n_blas_simt); - } - - void A_inv_chi(Spinor *const out, Spinor const *const in, int const isign_ignored, - int const cb_ignored) override { - int const n_blas_simt = 1; - ::QPhiX::axy(1.0 / mass_factor_alpha, in, out, upstream_dslash.getGeometry(), n_blas_simt); - } - - void dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, - int const isign, int const cb) override { - upstream_dslash.dslash(res, psi, u, isign, cb); - } - - void plain_dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, - int const isign, int const cb) override { - dslash(res, psi, u, isign, cb); - }; - - void A_inv_dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, - int const isign, int const cb) override { - auto tmp = QPhiX::makeFourSpinorHandle(upstream_dslash.getGeometry()); - dslash(tmp.get(), psi, u, isign, cb); - A_inv_chi(res, tmp.get(), isign, cb); - }; - - void achimbdpsi(Spinor *const res, const Spinor *const psi, const Spinor *const chi, - const SU3MatrixBlock *const u, double const alpha, double const beta, - int const isign, int const cb) override { - upstream_dslash.dslashAChiMinusBDPsi(res, psi, chi, u, alpha, beta, isign, cb); - } - - private: - ::QPhiX::Dslash upstream_dslash; - - double const mass_factor_alpha; - double const mass_factor_beta; -}; - -template -class WilsonTMDslash : public Dslash { - public: - typedef typename ::QPhiX::Geometry::FourSpinorBlock Spinor; - typedef typename ::QPhiX::Geometry::SU3MatrixBlock SU3MatrixBlock; - - WilsonTMDslash(::QPhiX::Geometry *geom_, double const t_boundary_, - double const aniso_coeff_S_, double const aniso_coeff_T_, double const mass_, - double const twisted_mass_, bool use_tbc_[4] = nullptr, - double tbc_phases_[4][2] = nullptr) - : Dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, - mass_, use_tbc_, tbc_phases_), - upstream_dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, mass_, twisted_mass_, - use_tbc_, tbc_phases_), - mass_factor_alpha(4.0 + mass_), - mass_factor_beta(0.25), - derived_mu(twisted_mass_ / mass_factor_alpha), - derived_mu_inv(mass_factor_alpha / - (mass_factor_alpha * mass_factor_alpha + twisted_mass_ * twisted_mass_)) {} - - void A_chi(Spinor *const out, Spinor const *const in, int const isign, - int const cb_ignored) override { - helper_A_chi(out, in, -derived_mu * isign, mass_factor_alpha); - } - - void A_inv_chi(Spinor *const out, Spinor const *const in, int const isign, - int const cb_ignored) override { - helper_A_chi(out, in, derived_mu * isign, derived_mu_inv); - } - - void dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, - int const isign, int const cb) override { - upstream_dslash.dslash(res, psi, u, isign, cb); - } - - void achimbdpsi(Spinor *const res, const Spinor *const psi, const Spinor *const chi, - const SU3MatrixBlock *const u, double const alpha, double const beta, - int const isign, int const cb) override { - upstream_dslash.dslashAChiMinusBDPsi(res, psi, chi, u, alpha, beta, isign, cb); - } - - private: - void helper_A_chi(Spinor *const out, Spinor const *const in, double const factor_a, - double const factor_b); - - ::QPhiX::TMDslash upstream_dslash; - - double const mass_factor_alpha; - double const mass_factor_beta; - double const derived_mu; - double const derived_mu_inv; -}; - -template -class WilsonClovDslash : public Dslash { - public: - typedef typename ::QPhiX::Geometry::FourSpinorBlock Spinor; - typedef typename ::QPhiX::Geometry::SU3MatrixBlock SU3MatrixBlock; - typedef typename ::QPhiX::Geometry::CloverBlock CloverBlock; - - WilsonClovDslash(::QPhiX::Geometry *geom_, - double const t_boundary_, double const aniso_coeff_S_, - double const aniso_coeff_T_, double const mass_, - CloverBlock *const (&clover_)[2], CloverBlock *const (&inv_clover_)[2], - bool use_tbc_[4] = nullptr, double tbc_phases_[4][2] = nullptr) - : Dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, - mass_, use_tbc_, tbc_phases_), - upstream_dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, use_tbc_, tbc_phases_), - mass_factor_alpha(4.0 + mass_), - mass_factor_beta(1.0 / (4.0 * mass_factor_alpha)) { - for (int cb : {0, 1}) { - clover[cb] = clover_[cb]; - inv_clover[cb] = inv_clover_[cb]; - } - } - - void A_chi(Spinor *const out, Spinor const *const in, int const isign_ignored, - int const cb) override { - clover_product(out, in, clover[cb], upstream_dslash.getGeometry()); - } - - void A_inv_chi(Spinor *const out, Spinor const *const in, int const isign_ignored, - int const cb) override { - clover_product(out, in, inv_clover[cb], upstream_dslash.getGeometry()); - } - - void dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, - int const isign, int const cb) override { - upstream_dslash.dslash(res, psi, u, inv_clover[cb], isign, cb); - } - - void achimbdpsi(Spinor *const res, const Spinor *const psi, const Spinor *const chi, - const SU3MatrixBlock *const u, double const alpha, double const beta, - int const isign, int const cb) override { - upstream_dslash.dslashAChiMinusBDPsi(res, psi, chi, u, clover[cb], mass_factor_beta, isign, cb); - } - - private: - ::QPhiX::ClovDslash upstream_dslash; - - double const mass_factor_alpha; - double const mass_factor_beta; - - /** - Reference to the clover term. - - This class has to provide a `dslash` and `achimbdpsi` member function with the prescribed - argument list which does not contain the clover term. The user of these classes should not have - to differentiate between non-clover and clover variants. In order to provide the function - signature, the clover term is a member. This means that the user has to construct a new operator - if the pointers to the clover field need to be changed. Seperate pointers are kept for the fields - on the even and odd checkerboards, hence the array dimension. - */ - CloverBlock *clover[2]; - - /// See \ref clover. - CloverBlock *inv_clover[2]; -}; - -template -class WilsonClovTMDslash : public Dslash { - public: - typedef typename ::QPhiX::Geometry::FourSpinorBlock Spinor; - typedef typename ::QPhiX::Geometry::SU3MatrixBlock SU3MatrixBlock; - typedef - typename ::QPhiX::Geometry::FullCloverBlock FullCloverBlock; - typedef - typename ::QPhiX::Geometry::CloverBlock CloverBlock; - - WilsonClovTMDslash(::QPhiX::Geometry *geom_, - double const t_boundary_, double const aniso_coeff_S_, - double const aniso_coeff_T_, double const mass_, double const twisted_mass_, - CloverBlock *const (&clover_)[2], - FullCloverBlock *const (&inv_clover_)[2][2], bool use_tbc_[4] = nullptr, - double tbc_phases_[4][2] = nullptr) - : Dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, - mass_, use_tbc_, tbc_phases_), - upstream_dslash(geom_, t_boundary_, aniso_coeff_S_, aniso_coeff_T_, use_tbc_, tbc_phases_), - mass_factor_alpha(4.0 + mass_), - mass_factor_beta(0.25), - derived_mu(twisted_mass_ / mass_factor_alpha), - derived_mu_inv(mass_factor_alpha / - (mass_factor_alpha * mass_factor_alpha + twisted_mass_ * twisted_mass_)) { - for (int cb : {0, 1}) { - clover[cb] = clover_[cb]; - for (int fl : {0, 1}) { - inv_clover[cb][fl] = inv_clover_[cb][fl]; - } - } - } - - void A_chi(Spinor *const out, Spinor const *const in, int const isign, int const cb) override { - clover_product(out, in, clover[cb], upstream_dslash.getGeometry()); - // TODO: add twisted mass here - } - - void A_inv_chi(Spinor *const out, Spinor const *const in, int const isign, - int const cb) override { - if (isign == -1) { - clover_product(out, in, inv_clover[cb][1], upstream_dslash.getGeometry()); - } else { - clover_product(out, in, inv_clover[cb][0], upstream_dslash.getGeometry()); - } - } - - void dslash(Spinor *const res, const Spinor *const psi, const SU3MatrixBlock *const u, - int const isign, int const cb) override { - upstream_dslash.dslash(res, psi, u, (const FullCloverBlock **)inv_clover[cb], isign, cb); - } - - void achimbdpsi(Spinor *const res, const Spinor *const psi, const Spinor *const chi, - const SU3MatrixBlock *const u, double const alpha, double const beta, - int const isign, int const cb) override { - upstream_dslash.dslashAChiMinusBDPsi(res, psi, chi, u, clover[cb], - mass_factor_beta, isign, cb); - } - - private: - ::QPhiX::TMClovDslash upstream_dslash; - - double const mass_factor_alpha; - double const mass_factor_beta; - double const derived_mu; - double const derived_mu_inv; - - CloverBlock *clover[2]; - /* For twisted clover, there are two fields on each checkerboard which differ in the sign - * of the twisted quark mass. In effect then, the inner index can be thought of as being - * in flavour space while the outer index is the checkerboard index. - */ - FullCloverBlock *inv_clover[2][2]; -}; - -template -void WilsonTMDslash::helper_A_chi(Spinor *const out, - Spinor const *const in, - double const factor_a, - double const factor_b) { - auto const nVecs = upstream_dslash.getGeometry().nVecs(); - auto const Pxy = upstream_dslash.getGeometry().getPxy(); - auto const Pxyz = upstream_dslash.getGeometry().getPxyz(); - - for (uint64_t t = 0; t < T; t++) - for (uint64_t x = 0; x < LX / 2; x++) - for (uint64_t y = 0; y < LY; y++) - for (uint64_t z = 0; z < LZ; z++) { - uint64_t const SIMD_vector = x / soalen; - uint64_t const x_internal = x % soalen; - uint64_t const qphix_idx = t * Pxyz + z * Pxy + y * nVecs + SIMD_vector; - - for (int color = 0; color < 3; ++color) { - for (int spin_block = 0; spin_block < 2; ++spin_block) { - // Implement the $\gamma_5$ structure. - auto const signed_factor_a = factor_a * (spin_block == 0 ? 1.0 : -1.0); - - for (int half_spin = 0; half_spin < 2; ++half_spin) { - auto const four_spin = 2 * spin_block + half_spin; - for (int v = 0; v < soalen; ++v) { - auto &out_bcs = out[qphix_idx][color][four_spin]; - auto const &in_bcs = in[qphix_idx][color][four_spin]; - - out_bcs[re][v] = factor_b * (in_bcs[re][v] + signed_factor_a * in_bcs[im][v]); - out_bcs[im][v] = factor_b * (in_bcs[im][v] - signed_factor_a * in_bcs[re][v]); - } - } - } - } - - } // volume -}; - -template -void Dslash::prepare_source(Spinor *const tilde_b_odd, - Spinor const *const b_even, - Spinor const *const b_odd, - SU3MatrixBlock const *const u) { - auto Mee_be = QPhiX::makeFourSpinorHandle(*geom); - WilsonDslash plain_dslash(geom, t_boundary, aniso_coeff_S, - aniso_coeff_T, mass); - - A_inv_chi(Mee_be.get(), b_even, 1, cb_even); - - plain_dslash.dslash(tilde_b_odd, Mee_be.get(), u, 1, cb_odd); - - // FIXME Perhaps use a variable number of BLAS threads here (last parameter). - QPhiX::aypx(0.5, Mee_be.get(), tilde_b_odd, *geom, 1); -} - -template -void Dslash::reconstruct_solution(Spinor *const x_even, - Spinor const *const b_even, - Spinor const *const x_odd, - SU3MatrixBlock const *const u) { - auto tmp = QPhiX::makeFourSpinorHandle(*geom); - WilsonDslash plain_dslash(geom, t_boundary, aniso_coeff_S, - aniso_coeff_T, mass); - - plain_dslash.dslash(tmp.get(), x_odd, u, 1, cb_even); - QPhiX::aypx(0.5, b_even, tmp.get(), *geom, 1); - A_inv_chi(x_even, tmp.get(), 1, cb_even); -} -} diff --git a/qphix_interface.cpp b/qphix_interface.cpp deleted file mode 100644 index 2c61427dd..000000000 --- a/qphix_interface.cpp +++ /dev/null @@ -1,2192 +0,0 @@ -/*********************************************************************** - * - * Copyright (C) 2015 Mario Schroeck - * 2016 Peter Labus - * 2017 Peter Labus, Martin Ueding, Bartosz Kostrzewa - * - * This file is part of tmLQCD. - * - * tmLQCD is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * tmLQCD is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with tmLQCD. If not, see . - * - ***********************************************************************/ - -#include "qphix_interface.h" -#include "qphix_interface.hpp" -#include "qphix_interface_utils.hpp" -#include "qphix_types.h" -#include "qphix_veclen.h" - -#ifdef TM_USE_MPI -#include -#endif - -extern "C" { -#ifdef HAVE_CONFIG_H -#include "tmlqcd_config.h" -#endif -#include "boundary.h" -#include "geometry_eo.h" -#include "gettime.h" -#include "global.h" -#include "linalg/convert_eo_to_lexic.h" -#include "linalg/diff.h" -#include "linalg/square_norm.h" -#include "misc_types.h" -#include "operator/Hopping_Matrix.h" -#include "operator/clover_leaf.h" -#include "operator/clovertm_operators.h" -#include "operator_types.h" -#include "struct_accessors.h" - -// for the normalisation of the heavy doublet when running -// RHMC -#include "phmc.h" - -#include "solver/matrix_mult_typedef.h" -#include "solver/solver.h" -#include "solver/solver_field.h" -#include "solver/solver_params.h" -#include "solver/solver_types.h" -#include "start.h" -#include "xchange/xchange_gauge.h" -} -#ifdef TM_USE_OMP -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace tmlqcd; - -tm_QPhiXParams_t qphix_input; - -int By; -int Bz; -int NCores; -int Sy; -int Sz; -int PadXY; -int PadXYZ; -int MinCt; -int N_simt; -bool compress12; -QphixPrec_t qphix_precision; -QphixPrec_t qphix_inner_precision; - -int subLattSize[4]; -int lattSize[4]; -int qmp_geom[4]; -int qmp_tm_map[4]; - -// angles for boundary phases, values come from read_input -extern double X0, X1, X2, X3; - -bool use_tbc[4]; -double tbc_phases[4][2]; -// we always use twisted boundary conditions, which means that we are always -// periodic in time and any possible anti-periodicity is implemented via -// the phase -double constexpr t_boundary = 1.0; - -template -struct rsdTarget { - static const double value; -}; - -template <> -const double rsdTarget::value = 1.0e-3; - -template <> -const double rsdTarget::value = 1.0e-8; - -void _initQphix(int argc, char **argv, tm_QPhiXParams_t params, int c12, QphixPrec_t precision_, - QphixPrec_t inner_precision_) { - static bool qmp_topo_initialised = false; - - // Global Lattice Size - lattSize[0] = LX * g_nproc_x; - lattSize[1] = LY * g_nproc_y; - lattSize[2] = LZ * g_nproc_z; - lattSize[3] = T * g_nproc_t; - - // Local Lattice Size - subLattSize[0] = LX; - subLattSize[1] = LY; - subLattSize[2] = LZ; - subLattSize[3] = T; - - // extract twisted boundary conditions - for (int dim = 0; dim < 4; dim++) { - bool dim_tbc = false; - double dim_phase[2] = {1.0, 0.0}; - if (dim == 0) { - dim_tbc = (fabs(X1) > DBL_EPSILON); - dim_phase[0] = -((double *)(&phase_1))[0] / g_kappa; - dim_phase[1] = -((double *)(&phase_1))[1] / g_kappa; - } else if (dim == 1) { - dim_tbc = (fabs(X2) > DBL_EPSILON); - dim_phase[0] = -((double *)(&phase_2))[0] / g_kappa; - dim_phase[1] = -((double *)(&phase_2))[1] / g_kappa; - } else if (dim == 2) { - dim_tbc = (fabs(X3) > DBL_EPSILON); - dim_phase[0] = -((double *)(&phase_3))[0] / g_kappa; - dim_phase[1] = -((double *)(&phase_3))[1] / g_kappa; - } else if (dim == 3) { - dim_tbc = (fabs(X0) > DBL_EPSILON); - dim_phase[0] = -((double *)(&phase_0))[0] / g_kappa; - dim_phase[1] = -((double *)(&phase_0))[1] / g_kappa; - } - use_tbc[dim] = dim_tbc; - tbc_phases[dim][0] = dim_phase[0]; - tbc_phases[dim][1] = dim_phase[1]; - } - - By = params.By; - Bz = params.Bz; - NCores = params.NCores; - Sy = params.Sy; - Sz = params.Sz; - PadXY = params.PadXY; - PadXYZ = params.PadXYZ; - MinCt = params.MinCt; - N_simt = Sy * Sz; - if (c12 == 8) { - QPhiX::masterPrintf( - "# INFO QphiX: 8-parameter gauge compression not supported, using two row compression " - "instead!\n"); - c12 = 12; - } - compress12 = c12 == 12 ? true : false; - qphix_precision = precision_; - qphix_inner_precision = inner_precision_; - -#ifdef QPHIX_QMP_COMMS - // Declare the logical topology - if (!qmp_topo_initialised) { - // the QMP topology is the one implied by the number of processes in each - // dimension as required by QPHIX ( x fastest to t slowest running ) - qmp_geom[0] = g_nproc_x; - qmp_geom[1] = g_nproc_y; - qmp_geom[2] = g_nproc_z; - qmp_geom[3] = g_nproc_t; - - // in order for the topologies to agree between tmLQCD and QPhiX, the dimensions need to be - // permuted - // since Z is fastest in tmLQCD and X is second-slowest - qmp_tm_map[0] = 2; - qmp_tm_map[1] = 1; - qmp_tm_map[2] = 0; - qmp_tm_map[3] = 3; - if (QMP_declare_logical_topology_map(qmp_geom, 4, qmp_tm_map, 4) != QMP_SUCCESS) { - QMP_error("Failed to declare QMP Logical Topology\n"); - abort(); - } - // longish test to check if the logical coordinates are correctly mapped - if (g_debug_level >= 5) { - for (int proc = 0; proc < g_nproc; proc++) { - if (proc == g_proc_id) { - const int coordinates[4] = {g_proc_coords[1], g_proc_coords[2], g_proc_coords[3], - g_proc_coords[0]}; - int id = QMP_get_node_number_from(coordinates); - int *qmp_coords = QMP_get_logical_coordinates_from(id); - fflush(stdout); - printf("QMP id: %3d x:%3d y:%3d z:%3d t:%3d\n", id, qmp_coords[0], qmp_coords[1], - qmp_coords[2], qmp_coords[3]); - printf("MPI id: %3d x:%3d y:%3d z:%3d t:%3d\n\n", g_proc_id, g_proc_coords[1], - g_proc_coords[2], g_proc_coords[3], g_proc_coords[0]); - free(qmp_coords); - fflush(stdout); - MPI_Barrier(MPI_COMM_WORLD); - } else { - MPI_Barrier(MPI_COMM_WORLD); - } - } - } - qmp_topo_initialised = true; - } -#endif - -#ifdef QPHIX_QPX_SOURCE - if (thread_bind) { - QPhiX::setThreadAffinity(NCores_user, Sy_user * Sz_user); - } - QPhiX::reportAffinity(); -#endif -} - -void _initQphix(int argc, char **argv, tm_QPhiXParams_t params, int c12, QphixPrec_t precision_) { - _initQphix(argc, argv, params, c12, precision_, precision_); -} - -// Finalize the QPhiX library -void _endQphix() {} - -template -void reorder_clover_to_QPhiX( - QPhiX::Geometry &geom, - typename QPhiX::Geometry::CloverBlock *qphix_clover, int cb, - bool inverse, bool fl_offdiag = false) { - const double startTime = gettime(); - - /* the spin-colour clover term in sw_term and the corresponding inverse - * in sw_inv are stored in the tmLQCD gamma basis. - * When we translate spinors to QPhiX, we apply a transformation V to the tmLQCD - * spinor and then apply the same transformation to the output spinor - * ( we have V^dagger = V and V*V = 1 ) - * Thus, in order to translate the clover field, we need to copy - * (1+T)' = V*(1+T)*V, where T is the spin-colour clover-term - * This way, the clover term will be in the correct gamma basis. - * - * The tmLQCD clover term is stored in half-spinor blocks of colour matrices - * for which we need to work out what (1+T)'=V*(1+T)*V implies. - * Below, each sAB represents one 3x3 colour matrix - * - * +s33 -s32 0 0 - * T' = V*T*V = -s23 +s22 0 0 - * 0 0 +s11 -s10 - * 0 0 -s01 +s00 - * - * Such that the half-spinor blocks are inverted and within these, the ordering is - * reversed. Note that the off-diagonal 3x3 colour blocks are hermitian conjugate to - * each other and this is preserved by the transformation. - * - * The QPhiX (Wilson) clover term is stored as 12 reals on the diagonal - * in two 6-element vectors, one for each half-spinor spin pair - * and two sets of off-diagonal complex components. - * - * In addition, colour matrices are transposed in QPhiX. - * - * The tmLQCD clover term is stored as: - * - * s00 s01 - * s11 - * T = s22 s23 - * s33 - * - * with indexing - * - * sw[0][0] sw[1][0] - * sw[2][0] - * sw[0][1] sw[1][1] - * sw[2][1] - * - * The inverse has four su3 blocks instead and is indexed - * sw_inv[0][0] sw_inv[1][0] - * sw_inv[3][0] sw_inv[2][0] - * sw_inv[0][1] sw_inv[1][1] - * sw_inv[3][1] sw_inv[2][1] - * - * where blocks sw_inv[3][0] and sw_inv[3][1] are relevant only when mu > 0 - * - * There is a special case for the non-degenerate twisted clover operator. The - * flavour-off-diagonal components of the inverse clover term do not have an imaginary part on the - * spin-colour diagonal. They can thus be stored as CloverBlock, which is done in the QPhiX - * implementation of the ND tmclover operator. - * - * As a hack, this inverse is prepared by sw_invert_epsbar and placed in to the last - * VOLUME/2 sites of sw_inv. Reading from there is triggered by the boolean - * fl_offdiag. - */ - - // rescale to get clover term (or its inverse) in the physical normalisation - // rather than the kappa normalisation - const double scale = inverse ? 2.0 * g_kappa : 1.0 / (2.0 * g_kappa); - su3 ***tm_clover = inverse ? sw_inv : sw; - - // Number of elements in spin, color & complex - const int Ns = 4; - const int Nc = 3; - const int Nz = 2; - - // Geometric parameters for QPhiX data layout - const auto ngy = geom.nGY(); - const auto nVecs = geom.nVecs(); - const auto Pxy = geom.getPxy(); - const auto Pxyz = geom.getPxyz(); - - // packer for Wilson clover (real diagonal + complex upper-triangular) - /* for the index in the off_diagN arrays, we map to an index in the su3 struct - * keeping in mind complex conjugation - * The off-diagonal in QPhiX is stored as follows: - * - * 0 1 3 6 10 - * 2 4 7 11 - * 5 8 12 - * 9 13 - * 14 - * - * which we are going to map to su3 in blocks - * - * 0* 1* - * 2* - * - * 3 4 5 - * 6 7 8 - * 10 11 12 - * - * 9* 13* - * 14* - * - * where the asterisk indicates complex conjugation. As a linear array then, - * these mappings are: - * - */ - const int od_su3_offsets[15] = {Nz, - 2 * Nz, // 0 1 - Nc * Nz + 2 * Nz, // 2 - - 0, - Nz, - 2 * Nz, // 3 4 5 - Nc * Nz, - Nc * Nz + Nz, - Nc * Nz + 2 * Nz, // 6 7 8 - - Nz, // 9 - - 2 * Nc * Nz, - 2 * Nc * Nz + Nz, - 2 * Nc * Nz + 2 * Nz, // 10 11 12 - - 2 * Nz, - Nc * Nz + 2 * Nz}; // 13 14 - -#pragma omp parallel for collapse(4) - for (int64_t t = 0; t < T; t++) { - for (int64_t z = 0; z < LZ; z++) { - for (int64_t y = 0; y < LY; y++) { - for (int64_t v = 0; v < nVecs; v++) { - int64_t block = (t * Pxyz + z * Pxy) / ngy + (y / ngy) * nVecs + v; - - for (int64_t x_soa = 0; x_soa < SOALEN; x_soa++) { - int64_t xx = (y % ngy) * SOALEN + x_soa; - int64_t q_cb_x_coord = x_soa + v * SOALEN; - int64_t tm_x_coord = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ cb); - - // the inverse of the clover term is in even-odd ordering - // while the clover term itself is lexicographically ordered - // for the special case of the nd tmclover operator, the inverse of the flavour - // off-diagonal components is stored in the last VOLUME/2 elements of sw_inv - int64_t tm_idx = - (inverse ? g_lexic2eosub[g_ipt[t][tm_x_coord][y][z]] : g_ipt[t][tm_x_coord][y][z]) + - ((inverse && fl_offdiag) ? VOLUME / 2 : 0); - - int b_idx; - - // we begin with the diagonal elements in CloverBlock - for (int d = 0; d < 6; d++) { - // choose the block in sw which corresponds to the block in T' - b_idx = d < 3 ? 2 : 0; - // get the right colour components - qphix_clover[block].diag1[d][xx] = QPhiX::rep( - *(reinterpret_cast(&tm_clover[tm_idx][b_idx][1].c00) + - (Nc * Nz + Nz) * (d % 3)) * - scale); - - qphix_clover[block].diag2[d][xx] = QPhiX::rep( - *(reinterpret_cast(&tm_clover[tm_idx][b_idx][0].c00) + - (Nc * Nz + Nz) * (d % 3)) * - scale); - } - - b_idx = 2; // s33 and s11 - for (int od : {0, 1, 2}) { - for (int reim : {0, 1}) { - qphix_clover[block].off_diag1[od][reim][xx] = QPhiX::rep( - (reim == 1 ? -1.0 : 1.0) * - *(reinterpret_cast(&tm_clover[tm_idx][b_idx][1].c00) + - od_su3_offsets[od] + reim) * - scale); - - qphix_clover[block].off_diag2[od][reim][xx] = QPhiX::rep( - (reim == 1 ? -1.0 : 1.0) * - *(reinterpret_cast(&tm_clover[tm_idx][b_idx][0].c00) + - od_su3_offsets[od] + reim) * - scale); - } - } - - b_idx = 1; // s32 and s10 - for (int od : {3, 4, 5, 6, 7, 8, 10, 11, 12}) { - for (int reim : {0, 1}) { - qphix_clover[block].off_diag1[od][reim][xx] = QPhiX::rep( - *(reinterpret_cast(&tm_clover[tm_idx][b_idx][1].c00) + - od_su3_offsets[od] + reim) * - (-scale)); - - qphix_clover[block].off_diag2[od][reim][xx] = QPhiX::rep( - *(reinterpret_cast(&tm_clover[tm_idx][b_idx][0].c00) + - od_su3_offsets[od] + reim) * - (-scale)); - } - } - - b_idx = 0; // s22 and s00 - for (int od : {9, 13, 14}) { - for (int reim : {0, 1}) { - qphix_clover[block].off_diag1[od][reim][xx] = QPhiX::rep( - (reim == 1 ? -1.0 : 1.0) * - *(reinterpret_cast(&tm_clover[tm_idx][b_idx][1].c00) + - od_su3_offsets[od] + reim) * - scale); - - qphix_clover[block].off_diag2[od][reim][xx] = QPhiX::rep( - (reim == 1 ? -1.0 : 1.0) * - *(reinterpret_cast(&tm_clover[tm_idx][b_idx][0].c00) + - od_su3_offsets[od] + reim) * - scale); - } - } - - } // x_soa - } // for(v) - } // for(y) - } // for(z) - } // for(t) - - const double diffTime = gettime() - startTime; - if (g_debug_level > 1) { - QPhiX::masterPrintf( - "# QPHIX-interface: time spent in reorder_clover_to_QPhiX (CloverBlock): %f secs\n", - diffTime); - } -} - -template -void reorder_clover_to_QPhiX( - QPhiX::Geometry &geom, - typename QPhiX::Geometry::FullCloverBlock *qphix_clover[2], - int cb, bool inverse) { - const double startTime = gettime(); - - /* the spin-colour clover term in sw_term and the corresponding inverse - * in sw_inv are stored in the tmLQCD gamma basis. - * When we translate spinors to QPhiX, we apply a transformation V to the tmLQCD - * spinor and then apply the same transformation to the output spinor - * ( we have V^dagger = V and V*V = 1 ) - * Thus, in order to translate the clover field, we need to copy - * (1+T)' = V*(1+T)*V, where T is the spin-colour clover-term - * This way, the clover term will be in the correct gamma basis. - * - * The tmLQCD clover term is stored in half-spinor blocks of colour matrices - * for which we need to work out what (1+T)'=V*(1+T)*V implies. - * Below, each sAB represents one 3x3 colour matrix - * - * +s33 -s32 0 0 - * T' = V*T*V = -s23 +s22 0 0 - * 0 0 +s11 -s10 - * 0 0 -s01 +s00 - * - * Such that the half-spinor blocks are inverted and within these, the ordering is - * reversed. Note that the off-diagonal 3x3 colour blocks are hermitian conjugate to - * each other and this is preserved by the transformation. - * - * The QPhiX (tmclover) clover term and its inverse are stored as a pair of full - * 6x6 complex matrices which are multiplied with the spinor in exactly the same way - * as in tmLQCD. - * - * The tmLQCD clover term is stored as: - * - * s00 s01 - * s11 - * T = s22 s23 - * s33 - * - * with indexing - * - * sw[0][0] sw[1][0] - * sw[2][0] - * sw[0][1] sw[1][1] - * sw[2][1] - * - * The inverse has four su3 blocks instead and is indexed - * sw_inv[0][0] sw_inv[1][0] - * sw_inv[3][0] sw_inv[2][0] - * sw_inv[0][1] sw_inv[1][1] - * sw_inv[3][1] sw_inv[2][1] - * - * where blocks sw_inv[3][0] and sw_inv[3][1] are relevant only when mu > 0 * - */ - - // rescale to get clover term (or its inverse) in the physical normalisation - // rather than the kappa normalisation - const double scale = inverse ? 2.0 * g_kappa : 1.0 / (2.0 * g_kappa); - su3 ***tm_clover = inverse ? sw_inv : sw; - - // Number of elements in spin, color & complex - const int Ns = 4; - const int Nc = 3; - const int Nz = 2; - - const double amu = g_mu / (2.0 * g_kappa); - - // Geometric parameters for QPhiX data layout - const auto ngy = geom.nGY(); - const auto nVecs = geom.nVecs(); - const auto Pxy = geom.getPxy(); - const auto Pxyz = geom.getPxyz(); - -#pragma omp parallel for collapse(4) - for (int64_t t = 0; t < T; t++) { - for (int64_t z = 0; z < LZ; z++) { - for (int64_t y = 0; y < LY; y++) { - for (int64_t v = 0; v < nVecs; v++) { - int64_t block = (t * Pxyz + z * Pxy) / ngy + (y / ngy) * nVecs + v; - - for (int64_t x_soa = 0; x_soa < SOALEN; x_soa++) { - int64_t xx = (y % ngy) * SOALEN + x_soa; - int64_t q_cb_x_coord = x_soa + v * SOALEN; - int64_t tm_x_coord = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ cb); - - // the inverse of the clover term is in even-odd ordering - // while the clover term itself is lexicographically ordered - int64_t tm_idx = - inverse ? g_lexic2eosub[g_ipt[t][tm_x_coord][y][z]] : g_ipt[t][tm_x_coord][y][z]; - - for (int fl : {0, 1}) { - if (inverse && fl == 1) { - // the inverse clover term for the second flavour is stored at an offset - tm_idx += VOLUME / 2; - } - for (int q_hs : {0, 1}) { - auto &hs_block = - ((q_hs == 0) ? qphix_clover[fl][block].block1 : qphix_clover[fl][block].block2); - for (int q_sc1 = 0; q_sc1 < 6; q_sc1++) { - for (int q_sc2 = 0; q_sc2 < 6; q_sc2++) { - const int q_s1 = q_sc1 / 3; - const int q_s2 = q_sc2 / 3; - const int q_c1 = q_sc1 % 3; - const int q_c2 = q_sc2 % 3; - - // invert in spin as required by V*T*V - const int t_hs = 1 - q_hs; - // the indices inside the half-spinor are also inverted - // (which transposes them, of course) - const int t_s1 = 1 - q_s1; - const int t_s2 = 1 - q_s2; - // carry out the mapping from T' to T, keeping in mind that for the inverse - // there are four blocks also on the tmLQCD side, otherwise there are just three - const int t_b_idx = t_s1 + t_s2 + ((inverse && t_s1 == 1 && t_s2 == 0) ? 2 : 0); - for (int reim : {0, 1}) { - hs_block[q_sc1][q_sc2][reim][xx] = QPhiX::rep( - scale * - // off-diagonal (odd-numbered) blocks change sign - (t_b_idx & 1 ? (-1.0) : 1.0) * - // if not doing the inverse and in the bottom-left block, need to - // complex conjugate - ((!inverse && (t_s1 == 1 && t_s2 == 0) && reim == 1) ? -1.0 : 1.0) * - *(reinterpret_cast( - &(tm_clover[tm_idx][t_b_idx][t_hs].c00)) + - // if not doing the inverse and in the bottom-left block, transpose - // in colour - // because we're actually reading out of the top-right block - Nz * ((!inverse && (t_s1 == 1 && t_s2 == 0)) ? Nc * q_c2 + q_c1 - : Nc * q_c1 + q_c2) + - reim) + - // in the QPhiX gamma basis, the twisted quark mass enters with the - // opposite - // sign for consistency - ((!inverse && q_sc1 == q_sc2 && q_hs == 0 && reim == 1) - ? -amu * (1 - 2 * fl) - : 0) + - ((!inverse && q_sc1 == q_sc2 && q_hs == 1 && reim == 1) - ? amu * (1 - 2 * fl) - : 0)); - } - } // q_sc2 - } // q_sc1 - } // q_hs - } // fl - - } // x_soa - } // for(v) - } // for(y) - } // for(z) - } // for(t) - - const double diffTime = gettime() - startTime; - if (g_debug_level > 1) { - QPhiX::masterPrintf( - "# QPHIX-interface: time spent in reorder_clover_to_QPhiX (FullCloverBlock): %f secs\n", - diffTime); - } -} - -template -void reorder_gauge_to_QPhiX( - QPhiX::Geometry &geom, - typename QPhiX::Geometry::SU3MatrixBlock *qphix_gauge_cb0, - typename QPhiX::Geometry::SU3MatrixBlock *qphix_gauge_cb1) { - const double startTime = gettime(); - - // Number of elements in spin, color & complex - // Here c1 is QPhiX's outer color, and c2 the inner one - const int Ns = 4; - const int Nc1 = compress12 ? 2 : 3; - const int Nc2 = 3; - const int Nz = 2; - - // Geometric parameters for QPhiX data layout - const auto ngy = geom.nGY(); - const auto nVecs = geom.nVecs(); - const auto Pxy = geom.getPxy(); - const auto Pxyz = geom.getPxyz(); - - // This is needed to translate between the different - // orderings of the direction index "\mu" in tmlQCD - // and QPhiX, respectively - // in qphix, the Dirac operator is applied in the order - // -+x -> -+y -> -+z -> -+t - // while tmlqcd does - // -+t -> -+x -> -+y -> -+z - // same as the lattice ordering - // The mappingn between the application dimensions is thus: - // tmlqcd_dim(t(0) -> x(1) -> y(2) -> z(3)) = qphix_dim( t(3) -> x(0) -> y(1) -> z(2) ) - const int change_dim[4] = {1, 2, 3, 0}; - - // Get the base pointer for the (global) tmlQCD gauge field - xchange_gauge(g_gauge_field); - const double *in = reinterpret_cast(&g_gauge_field[0][0].c00); - -#pragma omp parallel for collapse(4) - for (int64_t t = 0; t < T; t++) - for (int64_t z = 0; z < LZ; z++) - for (int64_t y = 0; y < LY; y++) - for (int64_t v = 0; v < nVecs; v++) { - int64_t block = (t * Pxyz + z * Pxy) / ngy + (y / ngy) * nVecs + v; - - for (int dim = 0; dim < 4; dim++) // dimension == QPhiX \mu - for (int c1 = 0; c1 < Nc1; c1++) // QPhiX convention color 1 (runs up to 2 or 3) - for (int c2 = 0; c2 < Nc2; c2++) // QPhiX convention color 2 (always runs up to 3) - for (int x_soa = 0; x_soa < SOALEN; x_soa++) { - int64_t xx = (y % ngy) * SOALEN + x_soa; - int64_t q_cb_x_coord = x_soa + v * SOALEN; - int64_t tm_x_coord_cb0 = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ 0); - int64_t tm_x_coord_cb1 = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ 1); - - int64_t tm_idx_cb0; - int64_t tm_idx_cb1; - - // backward / forward - for (int dir = 0; dir < 2; dir++) { - if (dir == 0) { - tm_idx_cb0 = g_idn[g_ipt[t][tm_x_coord_cb0][y][z]][change_dim[dim]]; - tm_idx_cb1 = g_idn[g_ipt[t][tm_x_coord_cb1][y][z]][change_dim[dim]]; - } else { - tm_idx_cb0 = g_ipt[t][tm_x_coord_cb0][y][z]; - tm_idx_cb1 = g_ipt[t][tm_x_coord_cb1][y][z]; - } - for (int reim = 0; reim < Nz; reim++) { - // Note: - // ----- - // 1. \mu in QPhiX runs from 0..7 for all eight neighbouring - // links. - // Here, the ordering of the direction (backward/forward) - // is the same - // for tmlQCD and QPhiX, but we have to change the - // ordering of the dimensions. - int q_mu = 2 * dim + dir; - - qphix_gauge_cb0[block][q_mu][c1][c2][reim][xx] = - QPhiX::rep(su3_get_elem( - &(g_gauge_field[tm_idx_cb0][change_dim[dim]]), c2, c1, reim)); - qphix_gauge_cb1[block][q_mu][c1][c2][reim][xx] = - QPhiX::rep(su3_get_elem( - &(g_gauge_field[tm_idx_cb1][change_dim[dim]]), c2, c1, reim)); - } - } - } // for(dim,c1,c2,x_soa) - } // outer loop (t,z,y,v) - - const double diffTime = gettime() - startTime; - if (g_debug_level > 1) { - QPhiX::masterPrintf("# QPHIX-interface: time spent in reorder_gauge_to_QPhiX: %f secs\n", - diffTime); - } -} - -// Reorder tmLQCD eo-spinor to a FourSpinorBlock QPhiX spinor on the given checkerboard -template -void reorder_eo_spinor_to_QPhiX( - QPhiX::Geometry &geom, spinor const *const tm_eo_spinor, - typename QPhiX::Geometry::FourSpinorBlock *qphix_spinor, - const int cb) { - const double startTime = gettime(); - - const int Ns = 4; - const int Nc = 3; - const int Nz = 2; - - const auto nVecs = geom.nVecs(); - const auto Pxy = geom.getPxy(); - const auto Pxyz = geom.getPxyz(); - const auto Nxh = geom.Nxh(); - - // This is needed to translate between the different - // gamma bases tmlQCD and QPhiX are using - // (note, this is a 4x4 matrix with 4 non-zero elements) - const int change_sign[4] = {1, -1, -1, 1}; - const int change_spin[4] = {3, 2, 1, 0}; - -#pragma omp parallel for collapse(4) - for (int64_t t = 0; t < T; t++) { - for (int64_t z = 0; z < LZ; z++) { - for (int64_t y = 0; y < LY; y++) { - for (int64_t v = 0; v < nVecs; v++) { - for (int col = 0; col < Nc; col++) { - for (int q_spin = 0; q_spin < Ns; q_spin++) { - for (int x_soa = 0; x_soa < SOALEN; x_soa++) { - int64_t q_ind = t * Pxyz + z * Pxy + y * nVecs + v; - int64_t q_cb_x_coord = v * SOALEN + x_soa; - // when t+y+z is odd and we're on an odd (1) checkerboard OR - // when t+y+z is even and we're on an even (0) checkerboard - // the full x coordinate is 2*x_cb - // otherwise, it is 2*x_cb+1 - int64_t tm_x_coord = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ cb); - // exchange x and z dimensions - int64_t tm_eo_ind = g_lexic2eosub[g_ipt[t][tm_x_coord][y][z]]; - - for (int reim = 0; reim < 2; reim++) { - qphix_spinor[q_ind][col][q_spin][reim][x_soa] = QPhiX::rep( - change_sign[q_spin] * - spinor_get_elem(&(tm_eo_spinor[tm_eo_ind]), change_spin[q_spin], col, reim)); - } - } - } - } - } - } - } - } - const double diffTime = gettime() - startTime; - if (g_debug_level > 1) { - QPhiX::masterPrintf("# QPHIX-interface: time spent in reorder_eo_spinor_to_QPhiX: %f secs\n", - diffTime); - } -} - -template -void reorder_eo_spinor_from_QPhiX( - QPhiX::Geometry &geom, spinor *tm_eo_spinor, - typename QPhiX::Geometry::FourSpinorBlock *qphix_spinor, - const int cb, double normFac = 1.0) { - const double startTime = gettime(); - - const int Ns = 4; - const int Nc = 3; - const int Nz = 2; - - const auto nVecs = geom.nVecs(); - const auto Pxy = geom.getPxy(); - const auto Pxyz = geom.getPxyz(); - const auto Nxh = geom.Nxh(); - - // This is needed to translate between the different - // gamma bases tmlQCD and QPhiX are using - // (note, this is a 4x4 matrix with 4 non-zero elements) - const int change_sign[4] = {1, -1, -1, 1}; - const int change_spin[4] = {3, 2, 1, 0}; - -#pragma omp parallel for collapse(4) - for (int64_t t = 0; t < T; t++) { - for (int64_t z = 0; z < LZ; z++) { - for (int64_t y = 0; y < LY; y++) { - for (int64_t v = 0; v < nVecs; v++) { - for (int col = 0; col < Nc; col++) { - for (int q_spin = 0; q_spin < Ns; q_spin++) { - for (int x_soa = 0; x_soa < SOALEN; x_soa++) { - int64_t q_ind = t * Pxyz + z * Pxy + y * nVecs + v; - int64_t q_cb_x_coord = v * SOALEN + x_soa; - // when t+y+z is odd and we're on an odd checkerboard (1) OR - // when t+y+z is even and we're on an even (0) checkerboard - // the full x coordinate is 2*x_cb - // otherwise, it is 2*x_cb+1 - int64_t tm_x_coord = q_cb_x_coord * 2 + (((t + y + z) & 1) ^ cb); - // exchange x and z dimensions - int64_t tm_eo_ind = g_lexic2eosub[g_ipt[t][tm_x_coord][y][z]]; - - spinor_set_elem( - &(tm_eo_spinor[tm_eo_ind]), change_spin[q_spin], col, - change_sign[q_spin] * normFac * - QPhiX::rep(qphix_spinor[q_ind][col][q_spin][0][x_soa]), - change_sign[q_spin] * normFac * - QPhiX::rep(qphix_spinor[q_ind][col][q_spin][1][x_soa])); - } - } - } - } - } - } - } - const double diffTime = gettime() - startTime; - if (g_debug_level > 1) { - QPhiX::masterPrintf("# QPHIX-interface: time spent in reorder_eo_spinor_from_QPhiX: %f secs\n", - diffTime); - } -} - -// Reorder a full tmLQCD spinor to a cb0 and cb1 QPhiX spinor -template -void reorder_spinor_to_QPhiX(QPhiX::Geometry &geom, - double const *tm_spinor, FT *qphix_spinor_cb0, FT *qphix_spinor_cb1) { - const double startTime = gettime(); - - // Number of elements in spin, color & complex - const int Ns = 4; - const int Nc = 3; - const int Nz = 2; - - // Geometric parameters for QPhiX data layout - const auto nVecs = geom.nVecs(); - const auto Pxy = geom.getPxy(); - const auto Pxyz = geom.getPxyz(); - - // This is needed to translate between the different - // gamma bases tmlQCD and QPhiX are using - const int change_sign[4] = {1, -1, -1, 1}; - const int change_spin[4] = {3, 2, 1, 0}; - -// This will loop over the entire lattice and calculate -// the array and internal indices for both tmlQCD & QPhiX -#pragma omp parallel for collapse(4) - for (uint64_t t = 0; t < T; t++) - for (uint64_t x = 0; x < LX; x++) - for (uint64_t y = 0; y < LY; y++) - for (uint64_t z = 0; z < LZ; z++) { - // These are the QPhiX SIMD vector in checkerboarded x direction - // (up to LX/2) and the internal position inside the SIMD vector - const uint64_t SIMD_vector = (x / 2) / SOALEN; - const uint64_t x_internal = (x / 2) % SOALEN; - - // Calculate the array index in tmlQCD & QPhiX, - // given a global lattice index (t,x,y,z) - const uint64_t qphix_idx = t * Pxyz + z * Pxy + y * nVecs + SIMD_vector; - const uint64_t tm_idx = g_ipt[t][x][y][z]; - - // Calculate base point for every spinor field element (tmlQCD) or - // for every SIMD vector of spinors, a.k.a FourSpinorBlock (QPhiX), - // which will depend on the checkerboard (cb) - const double *in = tm_spinor + Ns * Nc * Nz * tm_idx; - FT *out; - if ((t + x + y + z) & 1) - out = qphix_spinor_cb1 + SOALEN * Nz * Nc * Ns * qphix_idx; // odd -> cb1 - else - out = qphix_spinor_cb0 + SOALEN * Nz * Nc * Ns * qphix_idx; // even -> cb0 - - // Copy the internal elements, performing a gamma basis transformation - for (int spin = 0; spin < Ns; spin++) // QPhiX spin index - for (int color = 0; color < Nc; color++) - for (int z = 0; z < Nz; z++) // RE or IM - { - const uint64_t qId = - x_internal + z * SOALEN + spin * SOALEN * Nz + color * SOALEN * Nz * Ns; - const uint64_t tId = z + color * Nz + change_spin[spin] * Nz * Nc; - - out[qId] = QPhiX::rep(change_sign[spin] * in[tId]); - } - - } // volume - - const double diffTime = gettime() - startTime; - if (g_debug_level > 1) { - QPhiX::masterPrintf("# QPHIX-interface: time spent in reorder_spinor_to_QPhiX: %f secs\n", - diffTime); - } -} - -// Reorder a cb0 and cb1 QPhiX spinor to a full tmLQCD spinor -template -void reorder_spinor_from_QPhiX(QPhiX::Geometry &geom, - double *tm_spinor, FT const *qphix_spinor_cb0, - FT const *qphix_spinor_cb1, double normFac = 1.0) { - const double startTime = gettime(); - - // Number of elements in spin, color & complex - const int Ns = 4; - const int Nc = 3; - const int Nz = 2; - - // Geometric parameters for QPhiX data layout - const auto nVecs = geom.nVecs(); - const auto Pxy = geom.getPxy(); - const auto Pxyz = geom.getPxyz(); - - // This is needed to translate between the different - // gamma bases tmlQCD and QPhiX are using - const int change_sign[4] = {1, -1, -1, 1}; - const int change_spin[4] = {3, 2, 1, 0}; - -// This will loop over the entire lattice and calculate -// the array and internal indices for both tmlQCD & QPhiX -#pragma omp parallel for collapse(4) - for (uint64_t t = 0; t < T; t++) - for (uint64_t x = 0; x < LX; x++) - for (uint64_t y = 0; y < LY; y++) - for (uint64_t z = 0; z < LZ; z++) { - // These are the QPhiX SIMD vector in checkerboarded x direction - // (up to LX/2) and the internal position inside the SIMD vector - const uint64_t SIMD_vector = (x / 2) / SOALEN; - const uint64_t x_internal = (x / 2) % SOALEN; - - // Calculate the array index in tmlQCD & QPhiX, - // given a global lattice index (t,x,y,z) - const uint64_t qphix_idx = t * Pxyz + z * Pxy + y * nVecs + SIMD_vector; - const uint64_t tm_idx = g_ipt[t][x][y][z]; - - // Calculate base point for every spinor field element (tmlQCD) or - // for every SIMD vector of spinors, a.k.a FourSpinorBlock (QPhiX), - // which will depend on the checkerboard (cb) - const FT *in; - if ((t + x + y + z) & 1) - in = qphix_spinor_cb1 + SOALEN * Nz * Nc * Ns * qphix_idx; // cb1 - else - in = qphix_spinor_cb0 + SOALEN * Nz * Nc * Ns * qphix_idx; // cb0 - double *out = tm_spinor + Ns * Nc * Nz * tm_idx; - - // Copy the internal elements, performing a gamma basis transformation - for (int spin = 0; spin < Ns; spin++) // tmlQCD spin index - for (int color = 0; color < Nc; color++) - for (int z = 0; z < Nz; z++) // RE or IM - { - const uint64_t qId = x_internal + z * SOALEN + change_spin[spin] * SOALEN * Nz + - color * SOALEN * Nz * Ns; - const uint64_t tId = z + color * Nz + spin * Nz * Nc; - - out[tId] = QPhiX::rep(normFac * change_sign[spin] * in[qId]); - } - - } // volume - - const double diffTime = gettime() - startTime; - if (g_debug_level > 1) { - QPhiX::masterPrintf("# QPHIX-interface: time spent in reorder_spinor_from_QPhiX: %f secs\n", - diffTime); - } -} - -template -void pack_nd_clover( - QPhiX::Geometry &geom, - QPhiX::Geometry &geom_inner, - typename QPhiX::Geometry::FullCloverBlock *full_invclov[2], - typename QPhiX::Geometry::CloverBlock *invclov_odiag, - typename QPhiX::Geometry::CloverBlock *clov, - typename QPhiX::Geometry::FullCloverBlock - *full_invclov_inner[2], - typename QPhiX::Geometry::CloverBlock - *invclov_odiag_inner, - typename QPhiX::Geometry::CloverBlock *clov_inner, - const int cb, bool pack_inner) { - typedef typename QPhiX::Geometry::CloverBlock QClover; - typedef typename QPhiX::Geometry::FullCloverBlock QFullClover; - typedef typename QPhiX::Geometry::CloverBlock - QClover_inner; - typedef typename QPhiX::Geometry::FullCloverBlock - QFullClover_inner; - - double start = gettime(); - reorder_clover_to_QPhiX(geom, clov, cb, false); - if (pack_inner) { - reorder_clover_to_QPhiX(geom_inner, clov_inner, cb, false); - } - - sw_invert_epsbar(g_epsbar); - reorder_clover_to_QPhiX(geom, invclov_odiag, 1 - cb, true, true); - if (pack_inner) { - reorder_clover_to_QPhiX(geom_inner, invclov_odiag_inner, 1 - cb, true, true); - } - - // no minus sign here, the difference in the sign of gamma5 - // is taken care of internally - sw_invert_mubar(g_mubar); - reorder_clover_to_QPhiX(geom, full_invclov, 1 - cb, true); - if (pack_inner) { - reorder_clover_to_QPhiX(geom_inner, full_invclov_inner, 1 - cb, true); - } - - sw_invert_nd(g_mubar * g_mubar - g_epsbar * g_epsbar); - - if (g_debug_level > 1) { - QPhiX::masterPrintf("# QPHIX-inteface: ND TMClover clover-field packing took %.4lf seconds\n", - gettime() - start); - } -} - -// Due to github issue #404, the helper functions to apply the full QPhiX operator -// are currently disabled because they conflict with the new interfaces in QPhiX -// itself. If required, these should be rewritten to use these interfaces -// rather than the base classes in qphix_base_classes.hpp - -// Apply the full QPhiX fermion matrix to checkerboarded tm spinors -// template -// void Mfull_helper(spinor *Even_out, spinor *Odd_out, const spinor *Even_in, const spinor *Odd_in, -// const op_type_t op_type) { -// // TODO: this should use handles for gauge and spinors because these are definitely temporary -// // objects -// typedef typename QPhiX::Geometry::SU3MatrixBlock QGauge; -// typedef typename QPhiX::Geometry::FourSpinorBlock QSpinor; -// typedef typename QPhiX::Geometry::CloverBlock QClover; -// typedef typename QPhiX::Geometry::FullCloverBlock QFullClover; -// -// if (g_debug_level > 1) tmlqcd::printQphixDiagnostics(V, S, compress, V, S, compress); -// -// double coeff_s = (FT)(1); -// double coeff_t = (FT)(1); -// -// QPhiX::Geometry geom(subLattSize, By, Bz, NCores, Sy, Sz, PadXY, PadXYZ, -// MinCt); -// -// // Wilson mass -// double mass = 1 / (2.0 * g_kappa) - 4; -// -// tmlqcd::Dslash *polymorphic_dslash; -// -// QGauge *u_packed[2]; -// QSpinor *qphix_in[2]; -// QSpinor *qphix_out[2]; -// -// QClover *clover[2]; -// QClover *inv_clover[2]; -// -// QFullClover *inv_fullclover[2][2]; -// -// QSpinor *tmp_spinor = (QSpinor *)geom.allocCBFourSpinor(); -// for (int cb : {0, 1}) { -// u_packed[cb] = (QGauge *)geom.allocCBGauge(); -// qphix_in[cb] = (QSpinor *)geom.allocCBFourSpinor(); -// qphix_out[cb] = (QSpinor *)geom.allocCBFourSpinor(); -// clover[cb] = nullptr; -// inv_clover[cb] = nullptr; -// for (int fl : {0, 1}) { -// inv_fullclover[cb][fl] = nullptr; -// } -// } -// reorder_gauge_to_QPhiX(geom, u_packed[cb_even], u_packed[cb_odd]); -// -// if (op_type == WILSON) { -// polymorphic_dslash = new tmlqcd::WilsonDslash( -// &geom, t_boundary, coeff_s, coeff_t, mass, use_tbc, tbc_phases); -// } else if (op_type == TMWILSON) { -// polymorphic_dslash = new tmlqcd::WilsonTMDslash( -// &geom, t_boundary, coeff_s, coeff_t, mass, -g_mu / (2.0 * g_kappa), use_tbc, tbc_phases); -// } else if (op_type == CLOVER && fabs(g_mu) <= DBL_EPSILON) { -// for (int cb : {0, 1}) { -// clover[cb] = (QClover *)geom.allocCBClov(); -// inv_clover[cb] = (QClover *)geom.allocCBClov(); -// -// reorder_clover_to_QPhiX(geom, clover[cb], cb, false); -// sw_invert(cb, 0); -// reorder_clover_to_QPhiX(geom, inv_clover[cb], cb, true); -// } -// -// polymorphic_dslash = new tmlqcd::WilsonClovDslash( -// &geom, t_boundary, coeff_s, coeff_t, mass, clover, inv_clover, use_tbc, tbc_phases); -// -// } else if (op_type == CLOVER && fabs(g_mu) > DBL_EPSILON) { -// for (int cb : {0, 1}) { -// clover[cb] = (QClover *)geom.allocCBClov(); -// for (int fl : {0, 1}) { -// inv_fullclover[cb][fl] = (QFullClover *)geom.allocCBFullClov(); -// } -// reorder_clover_to_QPhiX(geom, clover[cb], cb, false); -// sw_invert(cb, g_mu); -// reorder_clover_to_QPhiX(geom, inv_fullclover[cb], cb, true); -// } -// -// polymorphic_dslash = new tmlqcd::WilsonClovTMDslash( -// &geom, t_boundary, coeff_s, coeff_t, mass, -g_mu / (2.0 * g_kappa), clover, -// inv_fullclover, use_tbc, tbc_phases); -// -// } else { -// QPhiX::masterPrintf("tmlqcd::Mfull_helper; No such operator type: %d\n", op_type); -// abort(); -// } -// -//// reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(Even_in), -//// qphix_in[cb_even], cb_even); -//// reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(Odd_in), -/// qphix_in[cb_odd], / cb_odd); -// reorder_eo_spinor_to_QPhiX(geom, Even_in, -// qphix_in[cb_even], cb_even); -// reorder_eo_spinor_to_QPhiX(geom, Odd_in, qphix_in[cb_odd], -// cb_odd); -// // Apply QPhiX Mfull -// polymorphic_dslash->plain_dslash(qphix_out[cb_odd], qphix_in[cb_even], u_packed[cb_odd], -// /* isign == non-conjugate */ 1, cb_odd); -// polymorphic_dslash->plain_dslash(qphix_out[cb_even], qphix_in[cb_odd], u_packed[cb_even], -// /* isign == non-conjugate */ 1, cb_even); -// for (int cb : {0, 1}) { -// polymorphic_dslash->A_chi(tmp_spinor, qphix_in[cb], 1, cb); -// QPhiX::aypx(-0.5, tmp_spinor, qphix_out[cb], geom, 1); -// } -// -// reorder_eo_spinor_from_QPhiX(geom, Even_out, qphix_out[cb_even], -// cb_even, 2.0 * g_kappa); -// reorder_eo_spinor_from_QPhiX(geom, Odd_out, qphix_out[cb_odd], cb_odd, -// 2.0 * g_kappa); -// -// geom.free(tmp_spinor); -// for (int cb : {0, 1}) { -// geom.free(u_packed[cb]); -// geom.free(qphix_in[cb]); -// geom.free(qphix_out[cb]); -// geom.free(clover[cb]); -// geom.free(inv_clover[cb]); -// for (int fl : {0, 1}) { -// geom.free(inv_fullclover[cb][fl]); -// } -// }; -// delete (polymorphic_dslash); -//} - -// Templated even-odd preconditioned solver using QPhiX Library -template -int invert_eo_qphix_helper(std::vector > &tmlqcd_odd_out, - std::vector > &tmlqcd_odd_in, - const double target_precision, const int max_iter, const int solver_flag, - solver_params_t solver_params, const int num_flavour) { - // TODO: it would perhaps be beneficial to keep the fields resident - typedef typename QPhiX::Geometry::SU3MatrixBlock QGauge; - typedef typename QPhiX::Geometry::FourSpinorBlock QSpinor; - typedef typename QPhiX::FourSpinorHandle QSpinorHandle; - typedef typename QPhiX::Geometry::CloverBlock QClover; - typedef typename QPhiX::Geometry::FullCloverBlock QFullClover; - - typedef typename QPhiX::Geometry::SU3MatrixBlock - QGauge_inner; - typedef typename QPhiX::Geometry::FourSpinorBlock - QSpinor_inner; - typedef typename QPhiX::FourSpinorHandle - QSpinorHandle_inner; - typedef typename QPhiX::Geometry::CloverBlock - QClover_inner; - typedef typename QPhiX::Geometry::FullCloverBlock - QFullClover_inner; - - /************************ - * * - * SETUP GEOMETRY * - * * - ************************/ - - if (g_debug_level > 1) { - tmlqcd::printQphixDiagnostics(V, S, compress, V_inner, S_inner, compress_inner); - } - - QPhiX::Geometry geom(subLattSize, By, Bz, NCores, Sy, Sz, PadXY, PadXYZ, - MinCt); - - // we always create the inner geometry, the overhead should be small... - QPhiX::Geometry geom_inner( - subLattSize, By, Bz, NCores, Sy, Sz, PadXY, PadXYZ, MinCt); - - // Set number of BLAS threads by hand. - // In case some implements the tune routines in QPhiX - // this may be updated... - QPhiX::masterPrintf("# Setting number of BLAS threads...\n"); - const int n_blas_simt = N_simt; - QPhiX::masterPrintf("# ...done.\n"); - - // Anisotropy Coefficents - const double coeff_s = 1.0; - const double coeff_t = 1.0; - - // The Wilson mass - const double mass = 1.0 / (2.0 * g_kappa) - 4.0; - - // Set variables need for solve - bool verbose = g_debug_level > 2 ? true : false; - int niters = -1; - int niters2 = 0; - double rsd_final = -1.0; - uint64_t site_flops = 0; - uint64_t site_flops2 = 0; - uint64_t mv_apps = 0; - uint64_t mv_apps2 = 0; - - double start_time; - double end_time; - - // support for multi-shift solves via the length of the output vector, - // which counts the shifts on the outer index and the flavour on the inner index - const int num_shifts = tmlqcd_odd_out.size(); - std::vector shifts; - shifts.resize(num_shifts); - std::vector RsdTargetArr; - RsdTargetArr.resize(num_shifts); - std::vector RsdFinalArr; - RsdFinalArr.resize(num_shifts); - - double rescale = 0.5 / g_kappa; - // the inverse of M M^dag, as required for the HMC, comes with a factor of alpha^2 - if (solver_params.solution_type == TM_SOLUTION_M_MDAG) { - rescale *= rescale; - } - - std::vector q_spinor_handles; - - QGauge *u_packed[2] = {nullptr, nullptr}; - QGauge_inner *u_packed_inner[2] = {nullptr, nullptr}; - for (int cb : {0, 1}) { - u_packed[cb] = (QGauge *)geom.allocCBGauge(); - } - // Reorder (global) input gauge field from tmLQCD to QPhiX - reorder_gauge_to_QPhiX(geom, u_packed[cb_even], u_packed[cb_odd]); - - // for mixed solvers, we also need the gauge field in the inner precision - if (solver_is_mixed(solver_flag)) { - for (int cb : {0, 1}) { - u_packed_inner[cb] = (QGauge_inner *)geom_inner.allocCBGauge(); - } - reorder_gauge_to_QPhiX(geom_inner, u_packed_inner[cb_even], u_packed_inner[cb_odd]); - } - - if (num_flavour == 1) { - constexpr int nf = 1; - std::vector qphix_in; - qphix_in.resize(1); - std::vector qphix_out; - qphix_out.resize(num_shifts); - QSpinor *qphix_buffer; - - QClover *qphix_clover = nullptr; - QClover *qphix_inv_clover = nullptr; - - QClover_inner *qphix_clover_inner = nullptr; - QClover_inner *qphix_inv_clover_inner = nullptr; - - QFullClover *qphix_inv_fullclover[2] = {nullptr, nullptr}; - - QFullClover_inner *qphix_inv_fullclover_inner[2] = {nullptr, nullptr}; - - q_spinor_handles.push_back(makeFourSpinorHandle(geom)); - qphix_in[0] = q_spinor_handles.back().get(); - - for (int shift = 0; shift < num_shifts; shift++) { - q_spinor_handles.push_back(makeFourSpinorHandle(geom)); - qphix_out[shift] = q_spinor_handles.back().get(); - } - - q_spinor_handles.push_back(makeFourSpinorHandle(geom)); - qphix_buffer = q_spinor_handles.back().get(); - - QPhiX::EvenOddLinearOperator *FermionMatrixQPhiX = nullptr; - QPhiX::EvenOddLinearOperator - *InnerFermionMatrixQPhiX = nullptr; - if ((fabs(g_mu) > DBL_EPSILON) && g_c_sw > DBL_EPSILON) { // TWISTED-MASS-CLOVER - qphix_clover = (QClover *)geom.allocCBClov(); - for (int fl : {0, 1}) { - qphix_inv_fullclover[fl] = (QFullClover *)geom.allocCBFullClov(); - } - reorder_clover_to_QPhiX(geom, qphix_clover, cb_odd, false); - reorder_clover_to_QPhiX(geom, qphix_inv_fullclover, cb_even, true); - - QPhiX::masterPrintf("# Creating QPhiX Twisted Clover Fermion Matrix...\n"); - FermionMatrixQPhiX = new QPhiX::EvenOddTMCloverOperator( - u_packed, qphix_clover, qphix_inv_fullclover, &geom, t_boundary, coeff_s, coeff_t, - use_tbc, tbc_phases, -0.5 * (g_mu3 + g_mu) / g_kappa); - if (solver_is_mixed(solver_flag)) { - qphix_clover_inner = (QClover_inner *)geom_inner.allocCBClov(); - for (int fl : {0, 1}) { - qphix_inv_fullclover_inner[fl] = (QFullClover_inner *)geom_inner.allocCBFullClov(); - } - reorder_clover_to_QPhiX(geom_inner, qphix_clover_inner, cb_odd, false); - reorder_clover_to_QPhiX(geom_inner, qphix_inv_fullclover_inner, cb_even, true); - InnerFermionMatrixQPhiX = - new QPhiX::EvenOddTMCloverOperator( - u_packed_inner, qphix_clover_inner, qphix_inv_fullclover_inner, &geom_inner, - t_boundary, coeff_s, coeff_t, use_tbc, tbc_phases, -0.5 * (g_mu3 + g_mu) / g_kappa); - } - QPhiX::masterPrintf("# ...done.\n"); - } else if (fabs(g_mu) > DBL_EPSILON) { // TWISTED-MASS - const double TwistedMass = -g_mu / (2.0 * g_kappa); - QPhiX::masterPrintf("# Creating QPhiX Twisted Mass Wilson Fermion Matrix...\n"); - FermionMatrixQPhiX = new QPhiX::EvenOddTMWilsonOperator( - mass, TwistedMass, u_packed, &geom, t_boundary, coeff_s, coeff_t, use_tbc, tbc_phases); - QPhiX::masterPrintf("# ...done.\n"); - if (solver_is_mixed(solver_flag)) { - InnerFermionMatrixQPhiX = - new QPhiX::EvenOddTMWilsonOperator( - mass, TwistedMass, u_packed_inner, &geom_inner, t_boundary, coeff_s, coeff_t, - use_tbc, tbc_phases); - } - } else if (g_c_sw > DBL_EPSILON) { // WILSON CLOVER - qphix_clover = (QClover *)geom.allocCBClov(); - qphix_inv_clover = (QClover *)geom.allocCBClov(); - - reorder_clover_to_QPhiX(geom, qphix_clover, cb_odd, false); - reorder_clover_to_QPhiX(geom, qphix_inv_clover, cb_even, true); - - QPhiX::masterPrintf("# Creating QPhiX Wilson Clover Fermion Matrix...\n"); - FermionMatrixQPhiX = new QPhiX::EvenOddCloverOperator( - u_packed, qphix_clover, qphix_inv_clover, &geom, t_boundary, coeff_s, coeff_t, use_tbc, - tbc_phases, -0.5 * g_mu3 / g_kappa); - if (solver_is_mixed(solver_flag)) { - qphix_clover_inner = (QClover_inner *)geom_inner.allocCBClov(); - qphix_inv_clover_inner = (QClover_inner *)geom_inner.allocCBClov(); - reorder_clover_to_QPhiX(geom_inner, qphix_clover_inner, cb_odd, false); - reorder_clover_to_QPhiX(geom_inner, qphix_inv_clover_inner, cb_even, true); - InnerFermionMatrixQPhiX = - new QPhiX::EvenOddCloverOperator( - u_packed_inner, qphix_clover_inner, qphix_inv_clover_inner, &geom_inner, t_boundary, - coeff_s, coeff_t, use_tbc, tbc_phases, -0.5 * g_mu3 / g_kappa); - } - QPhiX::masterPrintf("# ...done.\n"); - - } else { // WILSON - QPhiX::masterPrintf("# Creating QPhiX Wilson Fermion Matrix...\n"); - FermionMatrixQPhiX = new QPhiX::EvenOddWilsonOperator( - mass, u_packed, &geom, t_boundary, coeff_s, coeff_t, use_tbc, tbc_phases); - if (solver_is_mixed(solver_flag)) { - InnerFermionMatrixQPhiX = - new QPhiX::EvenOddWilsonOperator( - mass, u_packed_inner, &geom_inner, t_boundary, coeff_s, coeff_t, use_tbc, - tbc_phases); - } - QPhiX::masterPrintf("# ...done.\n"); - } - - // Create a Linear Solver Object - QPhiX::AbstractSolver *SolverQPhiX = nullptr; - QPhiX::AbstractSolver *InnerSolverQPhiX = nullptr; - QPhiX::AbstractMultiSolver *MultiSolverQPhiX = nullptr; - if (solver_flag == DUMMYHERMTEST) { - QPhiX::masterPrintf("# QPHIX: Creating dummy solver for hermiticity test...\n"); - SolverQPhiX = - new QPhiX::InvDummyHermTest >( - *FermionMatrixQPhiX, max_iter); - } else if (solver_flag == CG) { - QPhiX::masterPrintf("# QPHIX: Creating CG solver...\n"); - SolverQPhiX = new QPhiX::InvCG(*FermionMatrixQPhiX, max_iter); - } else if (solver_flag == BICGSTAB) { - QPhiX::masterPrintf("# QPHIX: Creating BiCGStab solver...\n"); - SolverQPhiX = new QPhiX::InvBiCGStab(*FermionMatrixQPhiX, max_iter); - } else if (solver_flag == MIXEDCG) { - // TODO: probably need to adjust inner solver iterations here... - QPhiX::masterPrintf("# QPHIX: Creating mixed-precision CG solver...\n"); - InnerSolverQPhiX = new QPhiX::InvCG( - *InnerFermionMatrixQPhiX, max_iter); - const bool MMdag = true; - SolverQPhiX = new QPhiX::InvRichardsonMultiPrec( - *FermionMatrixQPhiX, *InnerSolverQPhiX, solver_params.mcg_delta, max_iter); - } else if (solver_flag == MIXEDBICGSTAB) { - QPhiX::masterPrintf("# QPHIX: Creating mixed-precision BICGCGSTAB solver...\n"); - InnerSolverQPhiX = new QPhiX::InvBiCGStab( - *InnerFermionMatrixQPhiX, max_iter); - const bool MMdag = false; - SolverQPhiX = new QPhiX::InvRichardsonMultiPrec( - *FermionMatrixQPhiX, *InnerSolverQPhiX, solver_params.mcg_delta, max_iter); - } else if (solver_flag == CGMMS) { - QPhiX::masterPrintf("# QPHIX: Creating multi-shift CG solver ...\n"); - MultiSolverQPhiX = - new QPhiX::MInvCG(*FermionMatrixQPhiX, max_iter, num_shifts); - } else { - QPhiX::masterPrintf(" Solver not yet supported by QPhiX!\n"); - QPhiX::masterPrintf(" Aborting...\n"); - abort(); - } - QPhiX::masterPrintf("# ...done.\n"); - - // reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(tmlqcd_odd_in[0][0]), - // qphix_in[0], cb_odd); - reorder_eo_spinor_to_QPhiX(geom, tmlqcd_odd_in[0][0], qphix_in[0], cb_odd); - QPhiX::masterPrintf("# Calling the solver...\n"); - - // Set the right precision for the QPhiX solver - // we get target_precision externally and and is given such, that it's either - // already relative or absolute - // Most QPhiX solvers allow setting absolute or relative residual - // by passing an appropriate flag, but this is not true for the multi-shift solver. - // As a result, we follow that solver and call ALL solvers with - // QPhiX::RELATIVE, which gives results consistent with tmLQCD in all cases. - double rhs_norm2 = 1.0; - QPhiX::norm2Spinor(rhs_norm2, qphix_in[0], geom, n_blas_simt); - const double RsdTarget = sqrt(target_precision / rhs_norm2); - - // Calling the solver - start_time = gettime(); - if (solver_flag == DUMMYHERMTEST) { - random_spinor_field_eo(tmlqcd_odd_out[0][0], 0, RN_GAUSS); - reorder_eo_spinor_to_QPhiX(geom, tmlqcd_odd_out[0][0], qphix_buffer, cb_odd); - for (int isign : {-1, 1}) { - (*SolverQPhiX)(qphix_buffer, qphix_in[0], RsdTarget, niters, rsd_final, site_flops, mv_apps, - isign, verbose, cb_odd, QPhiX::RELATIVE); - } - QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); - } else if (solver_flag == CG || solver_flag == MIXEDCG || solver_flag == RGMIXEDCG) { - // USING CG: - // We are solving - // M M^dagger qphix_buffer = qphix_in_prepared - // here, that is, isign = -1 for the QPhiX CG solver. - (*SolverQPhiX)(qphix_buffer, qphix_in[0], RsdTarget, niters, rsd_final, site_flops, mv_apps, - -1, verbose, cb_odd, QPhiX::RELATIVE); - // After that. if required by the solution type, multiply with M^dagger: - // qphix_out[1] = M^dagger ( M^dagger^-1 M^-1 ) qphix_in_prepared - if (solver_params.solution_type == TM_SOLUTION_M) { - (*FermionMatrixQPhiX)(qphix_out[0], qphix_buffer, /* conjugate */ -1); - mv_apps++; - } else { - QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); - } - } else if (solver_flag == CGMMS) { - // TODO: handle the residuals properly - if (g_debug_level > 2) QPhiX::masterPrintf("# QPHIX CGMMS: shifts: \n"); - for (int shift = 0; shift < num_shifts; shift++) { - RsdTargetArr[shift] = RsdTarget; - RsdFinalArr[shift] = -1.0; - shifts[shift] = - solver_params.shifts[shift] * solver_params.shifts[shift] / (4 * g_kappa * g_kappa); - if (g_debug_level > 2) - QPhiX::masterPrintf("# QPHIX CGMMS: shift[%d] = %.6e\n", shift, shifts[shift]); - } - if (g_debug_level > 2) QPhiX::masterPrintf("\n"); - (*MultiSolverQPhiX)(qphix_out.data(), qphix_in[0], num_shifts, shifts.data(), - RsdTargetArr.data(), niters, RsdFinalArr.data(), site_flops, mv_apps, -1, - verbose); - rsd_final = RsdFinalArr[0]; - } else if (solver_flag == BICGSTAB || solver_flag == MIXEDBICGSTAB) { - (*SolverQPhiX)(qphix_buffer, qphix_in[0], RsdTarget, niters, rsd_final, site_flops, mv_apps, - 1, verbose, cb_odd, QPhiX::RELATIVE); - // for M^dagger^-1 M^-1 solution type, need to call BiCGstab twice - if (solver_params.solution_type == TM_SOLUTION_M_MDAG) { - (*SolverQPhiX)(qphix_out[0], qphix_buffer, RsdTarget, niters2, rsd_final, site_flops, - mv_apps2, -1, verbose, cb_odd, QPhiX::RELATIVE); - } else { - QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); - } - } - end_time = gettime(); - - for (int shift = 0; shift < num_shifts; shift++) { - reorder_eo_spinor_from_QPhiX(geom, tmlqcd_odd_out[shift][0], qphix_out[shift], cb_odd, - rescale); - } - - QPhiX::masterPrintf("# QPHIX: ...done.\n"); - QPhiX::masterPrintf("# QPHIX: Cleaning up\n"); - delete (FermionMatrixQPhiX); - delete (InnerFermionMatrixQPhiX); - delete (SolverQPhiX); - delete (InnerSolverQPhiX); - delete (MultiSolverQPhiX); - // on KNL, it seems that munmap is problematic, so we check for nullptr - if (qphix_clover) geom.free(qphix_clover); - if (qphix_inv_clover) geom.free(qphix_inv_clover); - if (qphix_clover_inner) geom_inner.free(qphix_clover_inner); - if (qphix_inv_clover_inner) geom_inner.free(qphix_inv_clover_inner); - for (int fl : {0, 1}) { - if (qphix_inv_fullclover[fl]) geom.free(qphix_inv_fullclover[fl]); - if (qphix_inv_fullclover_inner[fl]) geom_inner.free(qphix_inv_fullclover_inner[fl]); - } - QPhiX::masterPrintf("# QPHIX: ...done.\n\n"); - - } else if (num_flavour == 2) { - // for explicit template arguments - constexpr int nf = 2; - - QSpinor *qphix_in[2]; - std::vector qphix_out; - qphix_out.resize(num_shifts); - for (int shift = 0; shift < num_shifts; shift++) { - qphix_out[shift] = new QSpinor *[2]; - for (int fl : {0, 1}) { - q_spinor_handles.push_back(makeFourSpinorHandle(geom)); - qphix_out[shift][fl] = q_spinor_handles.back().get(); - } - } - - QSpinor *qphix_buffer[2]; - for (int fl : {0, 1}) { - q_spinor_handles.push_back(makeFourSpinorHandle(geom)); - qphix_in[fl] = q_spinor_handles.back().get(); - q_spinor_handles.push_back(makeFourSpinorHandle(geom)); - qphix_buffer[fl] = q_spinor_handles.back().get(); - } - - QClover *qphix_clover = nullptr; - QClover_inner *qphix_clover_inner = nullptr; - - QClover *qphix_invclov_odiag = nullptr; - QClover_inner *qphix_invclov_odiag_inner = nullptr; - - QFullClover *qphix_inv_fullclover[2] = {nullptr, nullptr}; - QFullClover_inner *qphix_inv_fullclover_inner[2] = {nullptr, nullptr}; - - QPhiX::TwoFlavEvenOddLinearOperator *TwoFlavFermionMatrixQPhiX = nullptr; - QPhiX::TwoFlavEvenOddLinearOperator - *InnerTwoFlavFermionMatrixQPhiX = nullptr; - - if (g_c_sw > DBL_EPSILON) { // DBCLOVER - qphix_clover = (QClover *)geom.allocCBClov(); - qphix_invclov_odiag = (QClover *)geom.allocCBClov(); - if (solver_is_mixed(solver_flag)) { - qphix_clover_inner = (QClover_inner *)geom_inner.allocCBClov(); - qphix_invclov_odiag_inner = (QClover_inner *)geom_inner.allocCBClov(); - } - - for (int fl : {0, 1}) { - qphix_inv_fullclover[fl] = (QFullClover *)geom.allocCBFullClov(); - if (solver_is_mixed(solver_flag)) { - qphix_inv_fullclover_inner[fl] = (QFullClover_inner *)geom_inner.allocCBFullClov(); - } - } - - pack_nd_clover(geom, geom_inner, qphix_inv_fullclover, qphix_invclov_odiag, qphix_clover, - qphix_inv_fullclover_inner, qphix_invclov_odiag_inner, qphix_clover_inner, - cb_odd, solver_is_mixed(solver_flag)); - - QPhiX::masterPrintf( - "# QPHIX: Creating two-flavour QPhiX Wilson Twisted Clover Fermion Matrix...\n"); - TwoFlavFermionMatrixQPhiX = new QPhiX::EvenOddNDTMCloverReuseOperator( - -0.5 * g_mubar / g_kappa, 0.5 * g_epsbar / g_kappa, u_packed, qphix_clover, - qphix_invclov_odiag, qphix_inv_fullclover, &geom, t_boundary, coeff_s, coeff_t, use_tbc, - tbc_phases); - if (solver_is_mixed(solver_flag)) { - InnerTwoFlavFermionMatrixQPhiX = - new QPhiX::EvenOddNDTMCloverReuseOperator( - -0.5 * g_mubar / g_kappa, 0.5 * g_epsbar / g_kappa, u_packed_inner, - qphix_clover_inner, qphix_invclov_odiag_inner, qphix_inv_fullclover_inner, - &geom_inner, t_boundary, coeff_s, coeff_t, use_tbc, tbc_phases); - } - } else { // DBTMWILSON - QPhiX::masterPrintf( - "# QPHIX: Creating two-flavour QPhiX Wilson Twisted Mass Fermion Matrix...\n"); - TwoFlavFermionMatrixQPhiX = new QPhiX::EvenOddNDTMWilsonReuseOperator( - mass, -0.5 * g_mubar / g_kappa, 0.5 * g_epsbar / g_kappa, u_packed, &geom, t_boundary, - coeff_s, coeff_t, use_tbc, tbc_phases); - if (solver_is_mixed(solver_flag)) { - InnerTwoFlavFermionMatrixQPhiX = - new QPhiX::EvenOddNDTMWilsonReuseOperator( - mass, -0.5 * g_mubar / g_kappa, 0.5 * g_epsbar / g_kappa, u_packed_inner, - &geom_inner, t_boundary, coeff_s, coeff_t, use_tbc, tbc_phases); - } - } - - // - QPhiX::AbstractSolver *TwoFlavSolverQPhiX = nullptr; - QPhiX::AbstractSolver *InnerTwoFlavSolverQPhiX = - nullptr; - QPhiX::AbstractMultiSolver *TwoFlavMultiSolverQPhiX = nullptr; - if (solver_flag == DUMMYHERMTEST) { - QPhiX::masterPrintf("# QPHIX: Creating dummy solver for hermiticity test...\n"); - TwoFlavSolverQPhiX = new QPhiX::InvDummyHermTest< - FT, V, S, compress, typename QPhiX::TwoFlavEvenOddLinearOperator >( - *TwoFlavFermionMatrixQPhiX, max_iter); - } else if (solver_flag == CG) { - QPhiX::masterPrintf("# QPHIX: Creating CG solver...\n"); - TwoFlavSolverQPhiX = - new QPhiX::InvCG >( - *TwoFlavFermionMatrixQPhiX, max_iter); - } else if (solver_flag == BICGSTAB) { - QPhiX::masterPrintf("# QPHIX: Creating BiCGstab solver...\n"); - TwoFlavSolverQPhiX = - new QPhiX::InvBiCGStab >( - *TwoFlavFermionMatrixQPhiX, max_iter); - } else if (solver_flag == MIXEDCG) { - QPhiX::masterPrintf("# QPHIX: Creating mixed-precision CG solver...\n"); - InnerTwoFlavSolverQPhiX = - new QPhiX::InvCG >( - *InnerTwoFlavFermionMatrixQPhiX, max_iter); - const bool MMdag = true; - TwoFlavSolverQPhiX = new QPhiX::InvRichardsonMultiPrec< - FT, V, S, compress, FT_inner, V_inner, S_inner, compress_inner, MMdag, - typename QPhiX::TwoFlavEvenOddLinearOperator >( - *TwoFlavFermionMatrixQPhiX, *InnerTwoFlavSolverQPhiX, solver_params.mcg_delta, max_iter); - } else if (solver_flag == CGMMSND) { - QPhiX::masterPrintf("# QPHIX: Creating multi-shift CG solver...\n"); - TwoFlavMultiSolverQPhiX = - new QPhiX::MInvCG >( - *TwoFlavFermionMatrixQPhiX, max_iter, num_shifts); - } else { - QPhiX::masterPrintf(" Solver not yet supported by QPhiX!\n"); - QPhiX::masterPrintf(" Aborting...\n"); - abort(); - } - QPhiX::masterPrintf("# QPHIX: ...done.\n"); - - for (int fl : {0, 1}) { - // reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(tmlqcd_odd_in[0][fl]), - // qphix_in[fl], cb_odd); - reorder_eo_spinor_to_QPhiX(geom, tmlqcd_odd_in[0][fl], qphix_in[fl], cb_odd); - } - - QPhiX::masterPrintf("# QPHIX: Calling the solver...\n"); - - // Set the right precision for the QPhiX solver - // we get target_precision externally and and is given such, that it's either - // already relative or absolute - // Most QPhiX solvers allow setting absolute or relative residual - // by passing an appropriate flag, but this is not true for the multi-shift solver. - // As a result, we follow that solver and call ALL solvers with - // QPhiX::RELATIVE, which gives results consistent with tmLQCD in all cases. - double rhs_norm2 = 1.0; - QPhiX::norm2Spinor(rhs_norm2, qphix_in, geom, n_blas_simt); - const double RsdTarget = sqrt(target_precision / rhs_norm2); - - // Calling the solver - start_time = gettime(); - if (solver_flag == DUMMYHERMTEST) { - for (int fl : {0, 1}) { - random_spinor_field_eo(tmlqcd_odd_out[0][fl], 0, RN_GAUSS); - reorder_eo_spinor_to_QPhiX(geom, tmlqcd_odd_out[0][fl], qphix_buffer[fl], cb_odd); - } - for (int isign : {-1, 1}) { - (*TwoFlavSolverQPhiX)(qphix_buffer, qphix_in, RsdTarget, niters, rsd_final, site_flops, - mv_apps, isign, verbose, cb_odd, QPhiX::RELATIVE); - } - QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); - } else if (solver_flag == CG || solver_flag == MIXEDCG) { - // USING CG: - // We are solving - // M M^dagger qphix_buffer = qphix_in_prepared - // here, that is, isign = -1 for the QPhiX CG solver. - (*TwoFlavSolverQPhiX)(qphix_buffer, qphix_in, RsdTarget, niters, rsd_final, site_flops, - mv_apps, -1, verbose, cb_odd, QPhiX::RELATIVE); - // After that. if required by the solution type, multiply with M^dagger: - // qphix_out[1] = M^dagger M^dagger^-1 M^-1 qphix_in_prepared - if (solver_params.solution_type == TM_SOLUTION_M) { - (*TwoFlavFermionMatrixQPhiX)(qphix_out[0], qphix_buffer, /* conjugate */ -1); - mv_apps++; - } else { - QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); - } - } else if (solver_flag == BICGSTAB || solver_flag == MIXEDBICGSTAB) { - (*TwoFlavSolverQPhiX)(qphix_buffer, qphix_in, RsdTarget, niters, rsd_final, site_flops, - mv_apps, 1, verbose, cb_odd, QPhiX::RELATIVE); - // for M^dagger^-1 M^-1 solution type, need to call BiCGstab twice - if (solver_params.solution_type == TM_SOLUTION_M_MDAG) { - (*TwoFlavSolverQPhiX)(qphix_out[0], qphix_buffer, RsdTarget, niters2, rsd_final, site_flops, - mv_apps2, -1, verbose, cb_odd, QPhiX::RELATIVE); - } else { - QPhiX::copySpinor(qphix_out[0], qphix_buffer, geom, n_blas_simt); - } - } else if (solver_flag == CGMMSND) { - // TODO: handle the residuals properly - if (g_debug_level > 2) QPhiX::masterPrintf("# QPHIX CGMMSND: shifts: \n"); - // tmLQCD weights the operator with 1/maxev in the RHMC relative to the shifts - // we will do this externally on the inverse (in monomial_solve) and thus need to weight - // the shifts by maxev^2 - const double maxev_sq = (1.0 / phmc_invmaxev) * (1.0 / phmc_invmaxev); - for (int shift = 0; shift < num_shifts; shift++) { - RsdTargetArr[shift] = RsdTarget; - RsdFinalArr[shift] = -1.0; - shifts[shift] = maxev_sq * solver_params.shifts[shift] * solver_params.shifts[shift] / - (4 * g_kappa * g_kappa); - if (g_debug_level > 2) QPhiX::masterPrintf("# [%d] = %lf\n", shift, shifts[shift]); - } - if (g_debug_level > 2) QPhiX::masterPrintf("\n"); - (*TwoFlavMultiSolverQPhiX)(qphix_out.data(), qphix_in, num_shifts, shifts.data(), - RsdTargetArr.data(), niters, RsdFinalArr.data(), site_flops, - mv_apps, -1, verbose); - rsd_final = RsdFinalArr[0]; - } - end_time = gettime(); - - for (int shift = 0; shift < num_shifts; shift++) { - for (int fl : {0, 1}) { - reorder_eo_spinor_from_QPhiX(geom, tmlqcd_odd_out[shift][fl], qphix_out[shift][fl], cb_odd, - rescale); - } - } - - delete TwoFlavFermionMatrixQPhiX; - delete InnerTwoFlavFermionMatrixQPhiX; - delete InnerTwoFlavSolverQPhiX; - delete TwoFlavMultiSolverQPhiX; - delete TwoFlavSolverQPhiX; - for (int shift = 0; shift < num_shifts; shift++) { - delete[] qphix_out[shift]; - } - - if (qphix_clover) geom.free(qphix_clover); - if (qphix_invclov_odiag) geom.free(qphix_invclov_odiag); - if (qphix_clover_inner) geom_inner.free(qphix_clover_inner); - if (qphix_invclov_odiag_inner) geom_inner.free(qphix_invclov_odiag_inner); - for (int fl : {0, 1}) { - if (qphix_inv_fullclover[fl]) geom.free(qphix_inv_fullclover[fl]); - if (qphix_inv_fullclover_inner[fl]) geom_inner.free(qphix_inv_fullclover_inner[fl]); - } - - } else { // if(num_flavour) - // complain, this number of flavours is not valid - } // if(num_flavour) - - for (int cb : {0, 1}) { - if (u_packed[cb]) geom.free(u_packed[cb]); - if (u_packed_inner[cb]) geom_inner.free(u_packed_inner[cb]); - } - - // FIXME: This should be called properly somewhere else - _endQphix(); - - QPhiX::masterPrintf("# ...done.\n\n"); - - uint64_t num_cb_sites = lattSize[0] / 2 * lattSize[1] * lattSize[2] * lattSize[3]; - // FIXME: this needs to be adjusted depending on the operator used - uint64_t op_flops_per_site = 1320; - uint64_t total_flops = - (site_flops + site_flops2 + (2 * num_flavour * op_flops_per_site) * (mv_apps + mv_apps2)) * - num_cb_sites; - QPhiX::masterPrintf("# QPHIX: Solver Time = %g sec\n", (end_time - start_time)); - QPhiX::masterPrintf("# QPHIX: Performance in GFLOPS = %g\n\n", - 1.0e-9 * total_flops / (end_time - start_time)); - - if (solver_is_mixed(solver_flag)) { - // the mixed solver reports the outer iterations, we would like to get - // some better total - niters = mv_apps / 2; - if (solver_flag == MIXEDBICGSTAB && solver_params.solution_type == TM_SOLUTION_M_MDAG) { - niters2 = mv_apps2 / 2; - } - } - // solver did not converge in maximum number of iterations - // FIXME: non-convergence does not work correctly yet - if ((niters + niters2) > max_iter) { - niters = -1; - niters2 = 0; - } - return (niters + niters2); -} - -// Due to github issue #404, the helper functions to apply the full QPhiX operator -// are currently disabled because they conflict with the new interfaces in QPhiX -// itself. If required, these should be rewritten to use these interfaces -// rather than the base classes in qphix_base_classes.hpp - -// Template wrapper for the Dslash operator call-able from C code -// void Mfull_qphix(spinor *Even_out, spinor *Odd_out, const spinor *Even_in, const spinor *Odd_in, -// const op_type_t op_type) { -// tmlqcd::checkQphixInputParameters(qphix_input); -// // FIXME: two-row gauge compression and double precision hard-coded -// _initQphix(0, nullptr, qphix_input, 12, QPHIX_DOUBLE_PREC); -// -// if (qphix_precision == QPHIX_DOUBLE_PREC) { -// if (QPHIX_SOALEN > VECLEN_DP) { -// QPhiX::masterPrintf("SOALEN=%d is greater than the double prec VECLEN=%d\n", QPHIX_SOALEN, -// VECLEN_DP); -// abort(); -// } -// QPhiX::masterPrintf("TESTING IN DOUBLE PRECISION \n"); -// if (compress12) { -// Mfull_helper(Even_out, Odd_out, Even_in, Odd_in, -// op_type); -// } else { -// Mfull_helper(Even_out, Odd_out, Even_in, Odd_in, -// op_type); -// } -// } else if (qphix_precision == QPHIX_FLOAT_PREC) { -// if (QPHIX_SOALEN > VECLEN_SP) { -// QPhiX::masterPrintf("SOALEN=%d is greater than the single prec VECLEN=%d\n", QPHIX_SOALEN, -// VECLEN_SP); -// abort(); -// } -// QPhiX::masterPrintf("TESTING IN SINGLE PRECISION \n"); -// if (compress12) { -// Mfull_helper(Even_out, Odd_out, Even_in, Odd_in, -// op_type); -// } else { -// Mfull_helper(Even_out, Odd_out, Even_in, Odd_in, -// op_type); -// } -// } -// #if (defined(QPHIX_MIC_SOURCE) || defined(QPHIX_AVX512_SOURCE)) -// else if (qphix_precision == QPHIX_HALF_PREC) { -// if (QPHIX_SOALEN > VECLEN_HP) { -// QPhiX::masterPrintf("SOALEN=%d is greater than the half prec VECLEN=%d\n", QPHIX_SOALEN, -// VECLEN_HP); -// abort(); -// } -// QPhiX::masterPrintf("TESTING IN HALF PRECISION \n"); -// if (compress12) { -// Mfull_helper(Even_out, Odd_out, Even_in, Odd_in, -// op_type); -// } else { -// Mfull_helper(Even_out, Odd_out, Even_in, -// Odd_in, -// op_type); -// } -// } -// #endif -//} - -// we have a unified interface for n-flavour inversions, but we need to provide wrappers -// which can be called by the tmLQCD solver drivers for one and two-flavour inversions -int invert_eo_qphix_oneflavour(spinor *Odd_out_1f, spinor *Odd_in_1f, const int max_iter, - const double precision, const int solver_flag, const int rel_prec, - const solver_params_t solver_params, const SloppyPrecision sloppy, - const CompressionType compression) { - const int num_flavour = 1; - const int num_shifts = 1; - std::vector > Odd_out; - std::vector > Odd_in; - - Odd_out.resize(num_shifts); - Odd_out[0].resize(num_flavour); - Odd_in.resize(1); - Odd_in[0].resize(num_flavour); - - Odd_in[0][0] = Odd_in_1f; - Odd_out[0][0] = Odd_out_1f; - - return invert_eo_qphix_nflavour_mshift(Odd_out, Odd_in, precision, max_iter, solver_flag, - rel_prec, solver_params, sloppy, compression, num_flavour); -} - -int invert_eo_qphix_oneflavour_mshift(spinor **Odd_out_1f, spinor *Odd_in_1f, const int max_iter, - const double precision, const int solver_flag, - const int rel_prec, const solver_params_t solver_params, - const SloppyPrecision sloppy, - const CompressionType compression) { - // even though the default is set to 1, guard against zeroes - const int num_shifts = solver_params.no_shifts == 0 ? 1 : solver_params.no_shifts; - const int num_flavour = 1; - std::vector > Odd_out; - std::vector > Odd_in; - - Odd_out.resize(num_shifts); - Odd_in.resize(1); - Odd_in[0].resize(num_flavour); - - Odd_in[0][0] = Odd_in_1f; - for (int shift = 0; shift < num_shifts; shift++) { - Odd_out[shift].resize(num_flavour); - Odd_out[shift][0] = Odd_out_1f[shift]; - } - - return invert_eo_qphix_nflavour_mshift(Odd_out, Odd_in, precision, max_iter, solver_flag, - rel_prec, solver_params, sloppy, compression, num_flavour); -} - -// Template wrapper for QPhiX solvers callable from C code, return number of iterations -int invert_eo_qphix_twoflavour(spinor *Odd_out_s, spinor *Odd_out_c, spinor *Odd_in_s, - spinor *Odd_in_c, const int max_iter, const double precision, - const int solver_flag, const int rel_prec, - const solver_params_t solver_params, const SloppyPrecision sloppy, - const CompressionType compression) { - const int num_flavour = 2; - const int num_shifts = 1; - std::vector > Odd_out; - std::vector > Odd_in; - - Odd_out.resize(num_shifts); - Odd_out[0].resize(num_flavour); - Odd_in.resize(1); - Odd_in[0].resize(num_flavour); - - Odd_in[0][0] = Odd_in_s; - Odd_in[0][1] = Odd_in_c; - - Odd_out[0][0] = Odd_out_s; - Odd_out[0][1] = Odd_out_c; - - return invert_eo_qphix_nflavour_mshift(Odd_out, Odd_in, precision, max_iter, solver_flag, - rel_prec, solver_params, sloppy, compression, num_flavour); -} - -int invert_eo_qphix_twoflavour_mshift(spinor **Odd_out_s, spinor **Odd_out_c, spinor *Odd_in_s, - spinor *Odd_in_c, const int max_iter, const double precision, - const int solver_flag, const int rel_prec, - const solver_params_t solver_params, - const SloppyPrecision sloppy, - const CompressionType compression) { - // even though the default is set to 1, guard against zeroes - const int num_shifts = solver_params.no_shifts == 0 ? 1 : solver_params.no_shifts; - const int num_flavour = 2; - std::vector > Odd_out; - std::vector > Odd_in; - - Odd_out.resize(num_shifts); - Odd_in.resize(1); - Odd_in[0].resize(num_flavour); - - Odd_in[0][0] = Odd_in_s; - Odd_in[0][1] = Odd_in_c; - - for (int shift = 0; shift < num_shifts; shift++) { - Odd_out[shift].resize(num_flavour); - Odd_out[shift][0] = Odd_out_s[shift]; - Odd_out[shift][1] = Odd_out_c[shift]; - } - - return invert_eo_qphix_nflavour_mshift(Odd_out, Odd_in, precision, max_iter, solver_flag, - rel_prec, solver_params, sloppy, compression, num_flavour); -} - -// Template wrapper for QPhiX solvers callable from C code, return number of iterations -// the interface is prepared for multi-rhs solves, hence the double vector for the input -int invert_eo_qphix_nflavour_mshift(std::vector > &Odd_out, - std::vector > &Odd_in, - const double precision, const int max_iter, - const int solver_flag, const int rel_prec, - solver_params_t solver_params, const SloppyPrecision sloppy, - const CompressionType compression, const int num_flavour) { - tmlqcd::checkQphixInputParameters(qphix_input); - double target_precision = precision; - double src_norm = 0.0; - for (int f = 0; f < num_flavour; ++f) { - src_norm += square_norm(Odd_in[0][f], VOLUME / 2, 1); - } - // we use "precision_lambda" to determine if a system can be solved in half or float - // precision (when a fixed-precision solver is used) - double precision_lambda = target_precision / src_norm; - if (rel_prec == 1) { - QPhiX::masterPrintf("# QPHIX: Using relative precision\n"); - target_precision = precision * src_norm; - precision_lambda = precision; - } - QPhiX::masterPrintf("# QPHIX: precision_lambda: %g, target_precision: %g\n\n", precision_lambda, - target_precision); - - // mixed solvers require inner and outer precisions, which we specify explicitly here - if (solver_is_mixed(solver_flag)) { -#if (defined(QPHIX_MIC_SOURCE) || defined(QPHIX_AVX512_SOURCE)) - if (sloppy == SLOPPY_HALF) { - if (QPHIX_SOALEN > VECLEN_DP || QPHIX_SOALEN > VECLEN_HP) { - QPhiX::masterPrintf( - "SOALEN=%d is greater than the half prec VECLEN=%d or the double prec VECLEN=%d\n", - QPHIX_SOALEN, VECLEN_HP, VECLEN_DP); - abort(); - } - QPhiX::masterPrintf("# INITIALIZING QPHIX MIXED SOLVER\n"); - QPhiX::masterPrintf("# USING DOUBLE-HALF PRECISION\n"); - _initQphix(0, nullptr, qphix_input, compression, QPHIX_DOUBLE_PREC, QPHIX_HALF_PREC); - if (compress12) { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } else { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } - } else -#else - if (sloppy == SLOPPY_HALF) { - QPhiX::masterPrintf("QPHIX interface: half precision not supported on this architecture!\n"); - abort(); - } else -#endif - if (sloppy == SLOPPY_SINGLE) { - if (QPHIX_SOALEN > VECLEN_DP || QPHIX_SOALEN > VECLEN_SP) { - QPhiX::masterPrintf( - "SOALEN=%d is greater than the single prec VECLEN=%d or the double prec VECLEN=%d\n", - QPHIX_SOALEN, VECLEN_SP, VECLEN_DP); - abort(); - } - QPhiX::masterPrintf("# INITIALIZING QPHIX MIXED SOLVER\n"); - QPhiX::masterPrintf("# USING DOUBLE-SINGLE PRECISION\n"); - _initQphix(0, nullptr, qphix_input, compression, QPHIX_DOUBLE_PREC, QPHIX_FLOAT_PREC); - if (compress12) { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } else { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } - } else { // if(sloppy) - if (QPHIX_SOALEN > VECLEN_DP) { - QPhiX::masterPrintf("SOALEN=%d is greater than the double prec VECLEN=%d\n", QPHIX_SOALEN, - VECLEN_DP); - abort(); - } - QPhiX::masterPrintf("# INITIALIZING QPHIX MIXED SOLVER\n"); - QPhiX::masterPrintf("# USING DOUBLE-DOUBLE PRECISION\n"); - _initQphix(0, nullptr, qphix_input, compression, QPHIX_DOUBLE_PREC, QPHIX_DOUBLE_PREC); - if (compress12) { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } else { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } - } // if( sloppy ) - } else { // if( solver_is_mixed ) -#if (defined(QPHIX_MIC_SOURCE) || defined(QPHIX_AVX512_SOURCE)) - if (sloppy == SLOPPY_HALF || precision_lambda >= rsdTarget::value) { - if (QPHIX_SOALEN > VECLEN_HP) { - QPhiX::masterPrintf("SOALEN=%d is greater than the half prec VECLEN=%d\n", QPHIX_SOALEN, - VECLEN_HP); - abort(); - } - QPhiX::masterPrintf("# INITIALIZING QPHIX SOLVER\n"); - QPhiX::masterPrintf("# USING HALF PRECISION\n"); - _initQphix(0, nullptr, qphix_input, compression, QPHIX_HALF_PREC); - - if (compress12) { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } else { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } - } else -#else - if (sloppy == SLOPPY_HALF) { - QPhiX::masterPrintf("QPHIX interface: half precision not supported on this architecture!\n"); - abort(); - } else -#endif - if (sloppy == SLOPPY_SINGLE || precision_lambda >= rsdTarget::value) { - if (QPHIX_SOALEN > VECLEN_SP) { - QPhiX::masterPrintf("SOALEN=%d is greater than the single prec VECLEN=%d\n", QPHIX_SOALEN, - VECLEN_SP); - abort(); - } - QPhiX::masterPrintf("# INITIALIZING QPHIX SOLVER\n"); - QPhiX::masterPrintf("# USING SINGLE PRECISION\n"); - _initQphix(0, nullptr, qphix_input, compression, QPHIX_FLOAT_PREC); - - if (compress12) { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } else { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } - } else { - if (QPHIX_SOALEN > VECLEN_DP) { - QPhiX::masterPrintf("SOALEN=%d is greater than the double prec VECLEN=%d\n", QPHIX_SOALEN, - VECLEN_DP); - abort(); - } - QPhiX::masterPrintf("# INITIALIZING QPHIX SOLVER\n"); - QPhiX::masterPrintf("# USING DOUBLE PRECISION\n"); - _initQphix(0, nullptr, qphix_input, compression, QPHIX_DOUBLE_PREC); - - if (compress12) { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } else { - return invert_eo_qphix_helper( - Odd_out, Odd_in, target_precision, max_iter, solver_flag, solver_params, num_flavour); - } - } // if( sloppy || target_precision ) - } // if ( solver_flag == *MIXEDCG ) - return -1; -} - -void tmlqcd::checkQphixInputParameters(const tm_QPhiXParams_t ¶ms) { - if (params.MinCt == 0) { - QPhiX::masterPrintf("QPHIX Error: MinCt cannot be 0! Minimal value: 1. Aborting.\n"); - abort(); - } - if (params.By == 0 || params.Bz == 0) { - QPhiX::masterPrintf("QPHIX Error: By and Bz may not be 0! Minimal value: 1. Aborting.\n"); - abort(); - } - if (params.NCores * params.Sy * params.Sz != omp_num_threads) { - QPhiX::masterPrintf("QPHIX Error: NCores * Sy * Sz != ompnumthreads ! Aborting.\n"); - abort(); - } -} - -void tmlqcd::printQphixDiagnostics(int VECLEN, int SOALEN, bool compress, int VECLEN_inner, - int SOALEN_inner, bool compress_inner) { - QPhiX::masterPrintf("# QphiX: VECLEN=%d SOALEN=%d VECLEN_inner=%d, SOALEN_inner=%d\n", VECLEN, - SOALEN, VECLEN_inner, SOALEN_inner); - - QPhiX::masterPrintf("# QphiX: Declared QMP Topology (xyzt):"); - for (int mu = 0; mu < 4; mu++) QPhiX::masterPrintf(" %d", qmp_geom[mu]); - QPhiX::masterPrintf("\n"); - - QPhiX::masterPrintf("# QphiX: Mapping of dimensions QMP -> tmLQCD (xyzt):"); - for (int mu = 0; mu < 4; mu++) QPhiX::masterPrintf(" %d->%d", mu, qmp_tm_map[mu]); - QPhiX::masterPrintf("\n"); - - QPhiX::masterPrintf("# QphiX: Global Lattice Size (xyzt) = "); - for (int mu = 0; mu < 4; mu++) { - QPhiX::masterPrintf(" %d", lattSize[mu]); - } - QPhiX::masterPrintf("\n"); - QPhiX::masterPrintf("# QphiX: Local Lattice Size (xyzt) = "); - for (int mu = 0; mu < 4; mu++) { - QPhiX::masterPrintf(" %d", subLattSize[mu]); - } - QPhiX::masterPrintf("\n"); - QPhiX::masterPrintf("# QphiX: Block Sizes: By= %d Bz=%d\n", By, Bz); - QPhiX::masterPrintf("# QphiX: Cores = %d\n", NCores); - QPhiX::masterPrintf("# QphiX: SMT Grid: Sy=%d Sz=%d\n", Sy, Sz); - QPhiX::masterPrintf("# QphiX: Pad Factors: PadXY=%d PadXYZ=%d\n", PadXY, PadXYZ); - QPhiX::masterPrintf("# QphiX: Threads_per_core = %d\n", N_simt); - QPhiX::masterPrintf("# QphiX: MinCt = %d\n", MinCt); - if (compress) { - QPhiX::masterPrintf("# QphiX: Using two-row gauge compression (compress12)\n"); - } - if (compress_inner) { - QPhiX::masterPrintf("# QphiX: Inner solver using two-row gauge compression (compress12)\n"); - } -} - -void testSpinorPackers(spinor *Even_out, spinor *Odd_out, const spinor *const Even_in, - const spinor *const Odd_in) { - tmlqcd::checkQphixInputParameters(qphix_input); - // FIXME: two-row gauge compression and double precision hard-coded - _initQphix(0, nullptr, qphix_input, 12, QPHIX_DOUBLE_PREC); - - QPhiX::Geometry geom(subLattSize, By, Bz, NCores, Sy, Sz, - PadXY, PadXYZ, MinCt); - - auto qphix_cb_even = QPhiX::makeFourSpinorHandle(geom); - auto qphix_cb_odd = QPhiX::makeFourSpinorHandle(geom); - - spinor **tmp; - init_solver_field(&tmp, VOLUME / 2, 2); - - // reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(Even_in), - // qphix_cb_even.get(), cb_even); - // reorder_eo_spinor_to_QPhiX(geom, reinterpret_cast(Odd_in), - // qphix_cb_odd.get(), cb_odd); - reorder_eo_spinor_to_QPhiX(geom, Even_in, qphix_cb_even.get(), cb_even); - reorder_eo_spinor_to_QPhiX(geom, Odd_in, qphix_cb_odd.get(), cb_odd); - - reorder_eo_spinor_from_QPhiX(geom, Even_out, qphix_cb_even.get(), cb_even, 1.0); - reorder_eo_spinor_from_QPhiX(geom, Odd_out, qphix_cb_odd.get(), cb_odd, 1.0); - - diff(tmp[0], Even_out, Even_in, VOLUME / 2); - diff(tmp[1], Odd_out, Odd_in, VOLUME / 2); - double l2norm = square_norm(tmp[0], VOLUME / 2, 1) + square_norm(tmp[1], VOLUME / 2, 1); - QPhiX::masterPrintf("QPHIX eo spinor packer back and forth difference L2 norm: %lf\n", l2norm); - finalize_solver(tmp, 2); -} diff --git a/qphix_interface.hpp b/qphix_interface.hpp deleted file mode 100644 index b487eda66..000000000 --- a/qphix_interface.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/*********************************************************************** - * - * Copyright (C) 2017 Bartosz Kostrzewa - * - * This file is part of tmLQCD. - * - * tmLQCD is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * tmLQCD is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with tmLQCD. If not, see . - * - ***********************************************************************/ - -#pragma once - -#include "global.h" -#include "qphix_types.h" - -#ifdef __cplusplus /* If this is a C++ compiler, use C linkage */ -extern "C" { -#endif - -#include "misc_types.h" -#include "operator_types.h" -#include "solver/matrix_mult_typedef.h" -#include "solver/solver_params.h" -#include "su3.h" - -#ifdef __cplusplus -} -#endif - -#include - -int invert_eo_qphix_nflavour_mshift(std::vector< std::vector< spinor* > > &Odd_out, - std::vector< std::vector< spinor* > > &Odd_in, - const double precision, - const int max_iter, - const int solver_flag, - const int rel_prec, - solver_params_t solver_params, - const SloppyPrecision sloppy, const CompressionType compression, - const int num_flavour); \ No newline at end of file diff --git a/qphix_interface_utils.hpp b/qphix_interface_utils.hpp deleted file mode 100644 index 56d8afe56..000000000 --- a/qphix_interface_utils.hpp +++ /dev/null @@ -1,33 +0,0 @@ -/*********************************************************************** - * - * Copyright (C) 2015 Mario Schroeck - * 2016 Peter Labus - * 2017 Peter Labus, Martin Ueding, Bartosz Kostrzewa - * - * This file is part of tmLQCD. - * - * tmLQCD is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * tmLQCD is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with tmLQCD. If not, see . - * - ***********************************************************************/ - -#pragma once - -#include "qphix_types.h" - -namespace tmlqcd { - -void checkQphixInputParameters(const tm_QPhiXParams_t ¶ms); -void printQphixDiagnostics(int VECLEN, int SOALEN, bool compress, int VECLEN_inner, int SOALEN_inner, bool compress_inner); - -} // namespace tmlqcd diff --git a/src/bin/LapH_ev.c b/src/bin/LapH_ev.c index dd96133fb..08e810b36 100644 --- a/src/bin/LapH_ev.c +++ b/src/bin/LapH_ev.c @@ -63,20 +63,20 @@ int main(int argc, char *argv[]) { tmlqcd_mpi_init(argc, argv); if (g_proc_id == 0) { -#ifdef _GAUGE_COPY - printf("# The code was compiled with -D_GAUGE_COPY\n"); +#ifdef TM_GAUGE_COPY + printf("# The code was compiled with -DTM_GAUGE_COPY\n"); #endif -#ifdef _USE_HALFSPINOR - printf("# The code was compiled with -D_USE_HALFSPINOR\n"); +#ifdef TM_USE_HALFSPINOR + printf("# The code was compiled with -DTM_USE_HALFSPINOR\n"); #endif -#ifdef _USE_SHMEM - printf("# the code was compiled with -D_USE_SHMEM\n"); -#ifdef _PERSISTENT +#ifdef TM_USE_SHMEM + printf("# the code was compiled with -DTM_USE_SHMEM\n"); +#ifdef TM_PERSISTENT printf("# the code was compiled for persistent MPI calls (halfspinor only)\n"); #endif #endif #ifdef TM_USE_MPI -#ifdef _NON_BLOCKING +#ifdef TM_NON_BLOCKING printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n"); #endif #endif @@ -98,8 +98,8 @@ int main(int argc, char *argv[]) { exit(0); #endif #endif -#ifdef FIXEDVOLUME - printf(" Error: FIXEDVOLUME not allowed"); +#ifdef TM_FIXEDVOLUME + printf(" Error: TM_FIXEDVOLUME not allowed"); exit(0); #endif diff --git a/src/bin/benchmark.c b/src/bin/benchmark.c index 3dd70a86b..72d8c8f4d 100644 --- a/src/bin/benchmark.c +++ b/src/bin/benchmark.c @@ -33,7 +33,7 @@ #include #ifdef TM_USE_MPI #include -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON #include #include #endif @@ -61,19 +61,19 @@ #include "test/check_geometry.h" #include "xchange/xchange.h" -#ifdef PARALLELT +#ifdef TM_PARALLELT #define SLICE (LX * LY * LZ / 2) -#elif defined PARALLELXT +#elif defined TM_PARALLELXT #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2)) -#elif defined PARALLELXYT +#elif defined TM_PARALLELXYT #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2) + (T * LX * LZ / 2)) -#elif defined PARALLELXYZT +#elif defined TM_PARALLELXYZT #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2) + (T * LX * LZ / 2) + (T * LX * LY / 2)) -#elif defined PARALLELX +#elif defined TM_PARALLELX #define SLICE ((LY * LZ * T / 2)) -#elif defined PARALLELXY +#elif defined TM_PARALLELXY #define SLICE ((LY * LZ * T / 2) + (LX * LZ * T / 2)) -#elif defined PARALLELXYZ +#elif defined TM_PARALLELXYZ #define SLICE ((LY * LZ * T / 2) + (LX * LZ * T / 2) + (LX * LY * T / 2)) #endif @@ -81,7 +81,7 @@ int check_xchange(); int main(int argc, char *argv[]) { int j, j_max, k, k_max = 1; -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON paramsXlfInfo *xlfInfo; #endif int status = 0; @@ -123,20 +123,20 @@ int main(int argc, char *argv[]) { tmlqcd_mpi_init(argc, argv); if (g_proc_id == 0) { -#ifdef _GAUGE_COPY - printf("# The code was compiled with -D_GAUGE_COPY\n"); +#ifdef TM_GAUGE_COPY + printf("# The code was compiled with -DTM_GAUGE_COPY\n"); #endif -#ifdef _USE_HALFSPINOR - printf("# The code was compiled with -D_USE_HALFSPINOR\n"); +#ifdef TM_USE_HALFSPINOR + printf("# The code was compiled with -DTM_USE_HALFSPINOR\n"); #endif -#ifdef _USE_SHMEM - printf("# The code was compiled with -D_USE_SHMEM\n"); -#ifdef _PERSISTENT +#ifdef TM_USE_SHMEM + printf("# The code was compiled with -DTM_USE_SHMEM\n"); +#ifdef TM_PERSISTENT printf("# The code was compiled for persistent MPI calls (halfspinor only)\n"); #endif #endif #ifdef TM_USE_MPI -#ifdef _NON_BLOCKING +#ifdef TM_NON_BLOCKING printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n"); #endif #endif @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) { fflush(stdout); } -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); @@ -186,7 +186,7 @@ int main(int argc, char *argv[]) { /* define the boundary conditions for the fermion fields */ boundary(g_kappa); -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); @@ -200,7 +200,7 @@ int main(int argc, char *argv[]) { exit(0); } } -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) init_xchange_halffield(); #endif #endif @@ -210,7 +210,7 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } -#if (defined TM_USE_MPI && !(defined _USE_SHMEM)) +#if (defined TM_USE_MPI && !(defined TM_USE_SHMEM)) check_xchange(); #endif @@ -344,7 +344,7 @@ int main(int argc, char *argv[]) { sdt = sdt / ((double)(2 * SLICE)); if (g_proc_id == 0) { printf("# The size of the package is %d bytes.\n", (SLICE) * 192); -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 192. / sdt / 1024 / 1024, 192. / sdt / 1024. / 1024); #else @@ -431,7 +431,7 @@ int main(int argc, char *argv[]) { } } -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON if (g_proc_id == 0) { printf("# Performing parallel IO test ...\n"); } diff --git a/src/bin/deriv_mg_tune.c b/src/bin/deriv_mg_tune.c index d3abb66ee..75595bc60 100644 --- a/src/bin/deriv_mg_tune.c +++ b/src/bin/deriv_mg_tune.c @@ -64,7 +64,7 @@ #include "solver/solver.h" #include "test/check_geometry.h" #include "update_tm.h" -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif #ifdef TM_USE_QUDA @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) { init_critical_globals(TM_PROGRAM_DERIV_MG_TUNE); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif @@ -136,7 +136,7 @@ int main(int argc, char *argv[]) { g_mu = g_mu1; -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); status += init_gauge_field_32(VOLUMEPLUSRAND + g_dbw2rand, 1); #else @@ -204,7 +204,7 @@ int main(int argc, char *argv[]) { exit(1); } -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); @@ -217,7 +217,7 @@ int main(int argc, char *argv[]) { exit(-1); } -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) init_xchange_halffield(); #endif #endif @@ -367,7 +367,7 @@ int main(int argc, char *argv[]) { #endif return (0); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(main) #endif } diff --git a/src/bin/hmc_tm.c b/src/bin/hmc_tm.c index 2db6f8c1b..0d95a3b3c 100644 --- a/src/bin/hmc_tm.c +++ b/src/bin/hmc_tm.c @@ -67,7 +67,7 @@ #include "solver/solver.h" #include "test/check_geometry.h" #include "update_tm.h" -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif #ifdef TM_USE_QUDA @@ -113,7 +113,7 @@ int main(int argc, char *argv[]) { init_critical_globals(TM_PROGRAM_HMC_TM); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif @@ -168,7 +168,7 @@ int main(int argc, char *argv[]) { g_mu = g_mu1; -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); status += init_gauge_field_32(VOLUMEPLUSRAND + g_dbw2rand, 1); #else @@ -257,7 +257,7 @@ int main(int argc, char *argv[]) { exit(1); } -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); @@ -270,7 +270,7 @@ int main(int argc, char *argv[]) { exit(-1); } -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) init_xchange_halffield(); #endif #endif @@ -504,7 +504,7 @@ int main(int argc, char *argv[]) { } /* online measurements */ -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG // When the configuration is rejected, we have to update it in the MG and redo the setup. int mg_update = accept ? 0 : 1; #endif @@ -514,7 +514,7 @@ int main(int argc, char *argv[]) { if (g_proc_id == 0) { fprintf(stdout, "#\n# Beginning online measurement.\n"); } -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG if (mg_update) { mg_update = 0; MG_reset(); @@ -591,7 +591,7 @@ int main(int argc, char *argv[]) { #endif return (0); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(main) #endif } diff --git a/src/bin/invert.c b/src/bin/invert.c index 007e0ea41..c3111decb 100644 --- a/src/bin/invert.c +++ b/src/bin/invert.c @@ -84,7 +84,7 @@ #ifdef TM_USE_QPHIX #include "qphix_interface.h" #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif #include "expo.h" @@ -114,7 +114,7 @@ int main(int argc, char *argv[]) { init_critical_globals(TM_PROGRAM_INVERT); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) { g_dbw2rand = 0; #endif -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); j += init_gauge_field_32(VOLUMEPLUSRAND, 1); #else @@ -246,7 +246,7 @@ int main(int argc, char *argv[]) { init_measurements(); /* this could be maybe moved to init_operators */ -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); @@ -258,7 +258,7 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n"); exit(-1); } -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) if (even_odd_flag) init_xchange_halffield(); #endif #endif @@ -362,7 +362,7 @@ int main(int argc, char *argv[]) { g_precWS = NULL; if (use_preconditioning == 1) { /* todo load fftw wisdom */ -#if (defined HAVE_FFTW) && !(defined TM_USE_MPI) +#if (defined TM_USE_FFTW) && !(defined TM_USE_MPI) loadFFTWWisdom(g_spinor_field[0], g_spinor_field[1], T, LX); #else use_preconditioning = 0; @@ -457,7 +457,7 @@ int main(int argc, char *argv[]) { MPI_Finalize(); #endif return (0); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(main) #endif } diff --git a/src/bin/offline_measurement.c b/src/bin/offline_measurement.c index c2ae72f9c..72a828fb7 100644 --- a/src/bin/offline_measurement.c +++ b/src/bin/offline_measurement.c @@ -83,7 +83,7 @@ int main(int argc, char *argv[]) { init_critical_globals(TM_PROGRAM_OFFLINE_MEASUREMENT); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif @@ -127,7 +127,7 @@ int main(int argc, char *argv[]) { g_dbw2rand = 0; #endif -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); @@ -210,7 +210,7 @@ int main(int argc, char *argv[]) { init_measurements(); /* this could be maybe moved to init_operators */ -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); @@ -223,7 +223,7 @@ int main(int argc, char *argv[]) { exit(-1); } } -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) if (even_odd_flag) init_xchange_halffield(); #endif #endif @@ -307,7 +307,7 @@ int main(int argc, char *argv[]) { #endif return (0); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(main) #endif } diff --git a/src/bin/check_locallity.c b/src/bin/tests/check_locallity.c similarity index 98% rename from src/bin/check_locallity.c rename to src/bin/tests/check_locallity.c index 52ea21209..f03806f21 100644 --- a/src/bin/check_locallity.c +++ b/src/bin/tests/check_locallity.c @@ -77,13 +77,13 @@ int main(int argc, char *argv[]) { double *norm; struct stout_parameters params_smear; -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY int kb = 0; #endif #ifdef TM_USE_MPI double atime = 0., etime = 0.; #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) { g_dbw2rand = 0; #endif -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); @@ -186,7 +186,7 @@ int main(int argc, char *argv[]) { /* define the boundary conditions for the fermion fields */ boundary(); -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); @@ -199,7 +199,7 @@ int main(int argc, char *argv[]) { exit(-1); } } -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) init_xchange_halffield(); #endif #endif @@ -312,7 +312,7 @@ int main(int argc, char *argv[]) { free_spinor_field(); free_moment_field(); return (0); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(main) #endif } diff --git a/src/bin/hopping_test.c b/src/bin/tests/hopping_test.c similarity index 94% rename from src/bin/hopping_test.c rename to src/bin/tests/hopping_test.c index 04df878e5..da60c83ba 100644 --- a/src/bin/hopping_test.c +++ b/src/bin/tests/hopping_test.c @@ -34,7 +34,7 @@ #include #ifdef TM_USE_MPI #include -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON #include #include #endif @@ -59,19 +59,19 @@ #include "test/check_geometry.h" #include "xchange/xchange.h" -#ifdef PARALLELT +#ifdef TM_PARALLELT #define SLICE (LX * LY * LZ / 2) -#elif defined PARALLELXT +#elif defined TM_PARALLELXT #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2)) -#elif defined PARALLELXYT +#elif defined TM_PARALLELXYT #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2) + (T * LX * LZ / 2)) -#elif defined PARALLELXYZT +#elif defined TM_PARALLELXYZT #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2) + (T * LX * LZ / 2) + (T * LX * LY / 2)) -#elif defined PARALLELX +#elif defined TM_PARALLELX #define SLICE ((LY * LZ * T / 2)) -#elif defined PARALLELXY +#elif defined TM_PARALLELXY #define SLICE ((LY * LZ * T / 2) + (LX * LZ * T / 2)) -#elif defined PARALLELXYZ +#elif defined TM_PARALLELXYZ #define SLICE ((LY * LZ * T / 2) + (LX * LZ * T / 2) + (LX * LY * T / 2)) #endif @@ -102,20 +102,20 @@ int main(int argc, char *argv[]) { tmlqcd_mpi_init(argc, argv); if (g_proc_id == 0) { -#ifdef _GAUGE_COPY - printf("# The code was compiled with -D_GAUGE_COPY\n"); +#ifdef TM_GAUGE_COPY + printf("# The code was compiled with -DTM_GAUGE_COPY\n"); #endif -#ifdef _USE_HALFSPINOR - printf("# The code was compiled with -D_USE_HALFSPINOR\n"); +#ifdef TM_USE_HALFSPINOR + printf("# The code was compiled with -DTM_USE_HALFSPINOR\n"); #endif -#ifdef _USE_SHMEM - printf("# the code was compiled with -D_USE_SHMEM\n"); -#ifdef _PERSISTENT +#ifdef TM_USE_SHMEM + printf("# the code was compiled with -DTM_USE_SHMEM\n"); +#ifdef TM_PERSISTENT printf("# the code was compiled for persistent MPI calls (halfspinor only)\n"); #endif #endif #ifdef TM_USE_MPI -#ifdef _NON_BLOCKING +#ifdef TM_NON_BLOCKING printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n"); #endif #endif @@ -123,7 +123,7 @@ int main(int argc, char *argv[]) { fflush(stdout); } -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) { /* define the boundary conditions for the fermion fields */ boundary(g_kappa); -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); @@ -179,7 +179,7 @@ int main(int argc, char *argv[]) { exit(0); } } -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) init_xchange_halffield(); #endif #endif @@ -190,7 +190,7 @@ int main(int argc, char *argv[]) { exit(1); } -#if (defined TM_USE_MPI && !(defined _USE_SHMEM)) +#if (defined TM_USE_MPI && !(defined TM_USE_SHMEM)) check_xchange(); #endif diff --git a/src/bin/qphix_test_Dslash.c b/src/bin/tests/qphix_test_Dslash.c similarity index 99% rename from src/bin/qphix_test_Dslash.c rename to src/bin/tests/qphix_test_Dslash.c index 56250bc5a..b4218d3e6 100644 --- a/src/bin/qphix_test_Dslash.c +++ b/src/bin/tests/qphix_test_Dslash.c @@ -35,7 +35,7 @@ #include #ifdef TM_USE_MPI #include -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON #include #include #endif @@ -86,7 +86,7 @@ double compare_spinors(spinor* s1, spinor* s2); int main(int argc, char* argv[]) { int j; -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON paramsXlfInfo* xlfInfo; #endif int status = 0; @@ -105,7 +105,7 @@ int main(int argc, char* argv[]) { tmlqcd_mpi_init(argc, argv); g_dbw2rand = 0; -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND, 1); #else init_gauge_field(VOLUMEPLUSRAND, 0); @@ -135,7 +135,7 @@ int main(int argc, char* argv[]) { /* define the geometry */ geometry(); -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n"); @@ -146,7 +146,7 @@ int main(int argc, char* argv[]) { fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n"); exit(0); } -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) init_xchange_halffield(); #endif #endif @@ -180,7 +180,7 @@ int main(int argc, char* argv[]) { #endif g_update_gauge_copy = 1; -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY update_backward_gauge(g_gauge_field); #endif diff --git a/src/bin/scalar_prod_r_test.c b/src/bin/tests/scalar_prod_r_test.c similarity index 100% rename from src/bin/scalar_prod_r_test.c rename to src/bin/tests/scalar_prod_r_test.c diff --git a/src/bin/test_eigenvalues.c b/src/bin/tests/test_eigenvalues.c similarity index 98% rename from src/bin/test_eigenvalues.c rename to src/bin/tests/test_eigenvalues.c index 053944698..759d8dd2f 100644 --- a/src/bin/test_eigenvalues.c +++ b/src/bin/tests/test_eigenvalues.c @@ -227,7 +227,7 @@ int main(int argc, char *argv[]) { g_eps_sq_acc = g_eps_sq_acc1; g_eps_sq_force = g_eps_sq_force1; -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); @@ -274,11 +274,11 @@ int main(int argc, char *argv[]) { parameterfile = fopen(parameterfilename, "w"); printf("# This is the hmc code for twisted Mass Wilson QCD\n\nVersion %s\n", Version); -#ifdef _NEW_GEOMETRY - printf("# The code was compiled with -D_NEW_GEOMETRY\n"); +#ifdef TM_NEW_GEOMETRY + printf("# The code was compiled with -DTM_NEW_GEOMETRY\n"); #endif -#ifdef _GAUGE_COPY - printf("# The code was compiled with -D_GAUGE_COPY\n"); +#ifdef TM_GAUGE_COPY + printf("# The code was compiled with -DTM_GAUGE_COPY\n"); #endif printf("# The lattice size is %d x %d x %d x %d\n", (int)(T * g_nproc_t), (int)(LX * g_nproc_x), (int)(LY), (int)(LZ)); @@ -430,7 +430,7 @@ int main(int argc, char *argv[]) { #ifdef TM_USE_MPI xchange_gauge(g_gauge_field); #endif -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY update_backward_gauge(); #endif diff --git a/src/bin/test_lemon.c b/src/bin/tests/test_lemon.c similarity index 99% rename from src/bin/test_lemon.c rename to src/bin/tests/test_lemon.c index f2147ad3f..3cef7689c 100644 --- a/src/bin/test_lemon.c +++ b/src/bin/tests/test_lemon.c @@ -66,7 +66,7 @@ int main(int argc, char *argv[]) { tmlqcd_mpi_init(argc, argv); -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); @@ -108,7 +108,7 @@ int main(int argc, char *argv[]) { xlfInfo = construct_paramsXlfInfo(plaquette_energy, 0); write_lime_gauge_field("conf.lime", 64, xlfInfo); -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON if (g_proc_id == 0) { printf("Now we do write with lemon to conf.lemon...\n"); } diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 4ace6c997..746b40c0d 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -369,11 +369,11 @@ list( list(APPEND TEST_SRC_C test/check_xchange.c test/check_geometry.c test/overlaptests.c) -if(TMLQCD_USE_QPHIX) +if(TM_USE_QPHIX) list(APPEND MAIN_SRC_C QphiX/qphix_interface.cpp) endif() -if(TMLQCD_USE_QUDA) +if(TM_USE_QUDA) list(APPEND MAIN_SRC_C quda_interface.c) endif() @@ -392,7 +392,8 @@ list( ${INIT_SRC_C} ${SOLVER_SRC_C} ${TEST_SRC_C} - ${MEAS_SRC_C}) + ${MEAS_SRC_C} + ${PROJECT_BINARY_DIR}/git_hash.c) include_directories( $ @@ -424,13 +425,10 @@ set_target_properties(hmc PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION 1) # define a library and add the dependencies target_link_libraries( hmc - PUBLIC $<$:rt> - $<$:tmlqcd::clime> - $<$:clemon::lemon> + PUBLIC $<$:rt> + $<$:tmlqcd::DDalphaAMG> $<$:tmlqcd::qphix> $<$:tmlqcd::fftw3> - $<$:MPI::MPI_C - MPI::MPI_CXX> $<$:QUDA::quda> $<$:CUDA::cufft CUDA::cufftw @@ -440,9 +438,13 @@ target_link_libraries( $<$:hip::hipfft roc::hipblas hip::host> + tmlqcd::clime + $<$:clemon::lemon> ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} - $<$:OpenMP::OpenMP_C + $<$:MPI::MPI_C + MPI::MPI_CXX> + $<$:OpenMP::OpenMP_C OpenMP::OpenMP_CXX> m) diff --git a/src/lib/DDalphaAMG_interface.c b/src/lib/DDalphaAMG_interface.c index 029d2f76f..80bff4fcc 100644 --- a/src/lib/DDalphaAMG_interface.c +++ b/src/lib/DDalphaAMG_interface.c @@ -17,13 +17,13 @@ * You should have received a copy of the GNU General Public License * along with tmLQCD. If not, see . * - * Interface for DDalphaAMG + * Interface for TM_USE_DDalphaAMG * *******************************************************************************/ #include "DDalphaAMG_interface.h" -#ifndef DDalphaAMG +#ifndef TM_USE_DDalphaAMG int mg_setup_iter; int mg_coarse_setup_iter; @@ -43,47 +43,47 @@ double mg_dtau_update; double mg_rho_update; void MG_init(void) { - printf("ERROR: MG_init called but DDalphaAMG library not included.\n"); + printf("ERROR: MG_init called but TM_USE_DDalphaAMG library not included.\n"); exit(1); } void MG_update_gauge(double step) { - printf("ERROR: MG_update_gauge called but DDalphaAMG library not included.\n"); + printf("ERROR: MG_update_gauge called but TM_USE_DDalphaAMG library not included.\n"); exit(1); } void MG_update_mu(double mu_tmLQCD, double odd_tmLQCD) { - printf("ERROR: MG_update_mu called but DDalphaAMG library not included.\n"); + printf("ERROR: MG_update_mu called but TM_USE_DDalphaAMG library not included.\n"); exit(1); } void MG_reset(void) { - printf("ERROR: MG_reset called but DDalphaAMG library not included.\n"); + printf("ERROR: MG_reset called but TM_USE_DDalphaAMG library not included.\n"); exit(1); } void MG_finalize(void) { - printf("ERROR: MG_finalize called but DDalphaAMG library not included.\n"); + printf("ERROR: MG_finalize called but TM_USE_DDalphaAMG library not included.\n"); exit(1); } int MG_solver(spinor *const phi_new, spinor *const phi_old, const double precision, const int max_iter, const int rel_prec, const int N, su3 **gf, matrix_mult f) { - printf("ERROR: MG_solver called but DDalphaAMG library not included.\n"); + printf("ERROR: MG_solver called but TM_USE_DDalphaAMG library not included.\n"); exit(1); } int MG_solver_eo(spinor *const Even_new, spinor *const Odd_new, spinor *const Even, spinor *const Odd, const double precision, const int max_iter, const int rel_prec, const int N, su3 **gf, matrix_mult_full f_full) { - printf("ERROR: MG_solver_eo called but DDalphaAMG library not included.\n"); + printf("ERROR: MG_solver_eo called but TM_USE_DDalphaAMG library not included.\n"); exit(1); } int MG_solver_nd(spinor *const up_new, spinor *const dn_new, spinor *const up_old, spinor *const dn_old, const double precision, const int max_iter, const int rel_prec, const int N, su3 **gf, matrix_mult_nd f) { - printf("ERROR: MG_solver_nd called but DDalphaAMG library not included.\n"); + printf("ERROR: MG_solver_nd called but TM_USE_DDalphaAMG library not included.\n"); exit(1); } @@ -207,7 +207,7 @@ static inline int MG_check(spinor *const phi_new, spinor *const phi_old, const i "ERROR: something bad happened... MG converged giving the wrong solution!! Trying to " "restart... \n"); printf( - "ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e " + "ERROR contd: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e " "\n", differ[0], differ[1], differ[0] / differ[1], precision); } @@ -215,7 +215,7 @@ static inline int MG_check(spinor *const phi_new, spinor *const phi_old, const i } if (g_debug_level > 0 && g_proc_id == 0) - printf("MGTEST: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", + printf("MGTEST: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", differ[0], differ[1], differ[0] / differ[1]); return 1; @@ -257,7 +257,7 @@ static inline int MG_check_nd(spinor *const up_new, spinor *const dn_new, spinor "ERROR: something bad happened... MG converged giving the wrong solution!! Trying to " "restart... \n"); printf( - "ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e " + "ERROR contd: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e " "\n", differ[0], differ[1], differ[0] / differ[1], precision); } @@ -265,7 +265,7 @@ static inline int MG_check_nd(spinor *const up_new, spinor *const dn_new, spinor } if (g_debug_level > 0 && g_proc_id == 0) - printf("MGTEST: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", + printf("MGTEST: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", differ[0], differ[1], differ[0] / differ[1]); return 1; @@ -304,7 +304,7 @@ static inline int MG_mms_check_nd(spinor **const up_new, spinor **const dn_new, "ERROR: something bad happened... MG converged giving the wrong solution!! Trying to " "restart... \n"); printf( - "ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > " + "ERROR contd: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > " "%e \n", differ[0], differ[1], differ[0] / differ[1], precision[i]); } @@ -313,7 +313,7 @@ static inline int MG_mms_check_nd(spinor **const up_new, spinor **const dn_new, } if (g_debug_level > 0 && g_proc_id == 0) - printf("MGTEST: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", + printf("MGTEST: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", differ[0], differ[1], differ[0] / differ[1]); } @@ -343,7 +343,7 @@ static int MG_pre_solve(su3 **gf) { if (mg_initialized == 0) { MG_init(); mg_initialized = 1; - if (g_proc_id == 0) printf("DDalphaAMG initialized\n"); + if (g_proc_id == 0) printf("TM_USE_DDalphaAMG initialized\n"); MPI_Barrier(MPI_COMM_WORLD); } @@ -351,23 +351,23 @@ static int MG_pre_solve(su3 **gf) { DDalphaAMG_set_configuration((double *)&(gf[0][0]), &mg_status); mg_update_gauge = 0; if (mg_status.success && g_proc_id == 0) - printf("DDalphaAMG cnfg set, plaquette %e\n", mg_status.info); + printf("TM_USE_DDalphaAMG cnfg set, plaquette %e\n", mg_status.info); else if (g_proc_id == 0) printf("ERROR: configuration updating did not run correctly"); } if (mg_do_setup == 1) { if (mg_setup_mu_set) { - if (g_proc_id == 0) printf("DDalphaAMG using mu=%f during setup\n", mg_setup_mu); + if (g_proc_id == 0) printf("TM_USE_DDalphaAMG using mu=%f during setup\n", mg_setup_mu); MG_update_mu(mg_setup_mu, 0); } else MG_update_mu(g_mu, 0); - if (g_proc_id == 0) printf("DDalphaAMG running setup\n"); + if (g_proc_id == 0) printf("TM_USE_DDalphaAMG running setup\n"); DDalphaAMG_setup(&mg_status); mg_do_setup = 0; mg_tau = gauge_tau; if (mg_status.success && g_proc_id == 0) - printf("DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n", mg_status.time, + printf("TM_USE_DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n", mg_status.time, 100. * (mg_status.coarse_time / mg_status.time)); else if (g_proc_id == 0) printf("ERROR: setup procedure did not run correctly"); @@ -375,16 +375,16 @@ static int MG_pre_solve(su3 **gf) { if (mg_update_setup > 0) { if (mg_setup_mu_set) { - if (g_proc_id == 0) printf("DDalphaAMG using mu=%f during setup\n", mg_setup_mu); + if (g_proc_id == 0) printf("TM_USE_DDalphaAMG using mu=%f during setup\n", mg_setup_mu); MG_update_mu(mg_setup_mu, 0); } else MG_update_mu(g_mu, 0); - if (g_proc_id == 0) printf("DDalphaAMG updating setup\n"); + if (g_proc_id == 0) printf("TM_USE_DDalphaAMG updating setup\n"); DDalphaAMG_update_setup(mg_update_setup, &mg_status); mg_update_setup = 0; mg_tau = gauge_tau; if (mg_status.success && g_proc_id == 0) - printf("DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n", mg_status.time, + printf("TM_USE_DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n", mg_status.time, 100. * (mg_status.coarse_time / mg_status.time)); else if (g_proc_id == 0) printf("ERROR: setup updating did not run correctly"); @@ -395,7 +395,7 @@ static int MG_pre_solve(su3 **gf) { static int MG_solve(spinor *const phi_new, spinor *const phi_old, const double precision, const int N, matrix_mult f) { - // for rescaling convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> + // for rescaling convention in TM_USE_DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> // rescale by 1/4+m double mg_scale = 0.5 / g_kappa; double *old = (double *)phi_old; @@ -529,7 +529,7 @@ static int MG_solve(spinor *const phi_new, spinor *const phi_old, const double p static int MG_solve_nd(spinor *up_new, spinor *dn_new, spinor *const up_old, spinor *const dn_old, const double precision, const int N, matrix_mult_nd f) { - // for rescaling convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> + // for rescaling convention in TM_USE_DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> // rescale by 1/4+m moreover in the nd case, the tmLQCD is multiplied by phmc_invmaxev double mg_scale = 0.5 / g_kappa / phmc_invmaxev; double sqnorm; @@ -803,7 +803,7 @@ static int MG_solve_nd(spinor *up_new, spinor *dn_new, spinor *const up_old, spi // 0 and shift f == Qsw_pm_ndpsi || // (Gamma5 Dh tau1)^2 - Schur complement squared f == Qsw_pm_ndpsi_shift) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift - // DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh + // TM_USE_DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh // tmLQCD: gamma5 Dh tau1 gamma5 Dh tau1 if (init_guess) { mul_gamma5(old1, VOLUME); @@ -900,7 +900,7 @@ static int MG_solve_nd(spinor *up_new, spinor *dn_new, spinor *const up_old, spi static int MG_mms_solve_nd(spinor **const up_new, spinor **const dn_new, spinor *const up_old, spinor *const dn_old, const double *shifts, const int no_shifts, double *precision, const int N, matrix_mult_nd f) { - // for rescaling convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> + // for rescaling convention in TM_USE_DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} -> // rescale by 1/4+m moreover in the nd case, the tmLQCD is multiplied by phmc_invmaxev double mg_scale = 0.5 / g_kappa / phmc_invmaxev; double *old1 = (double *)up_old; @@ -1001,7 +1001,7 @@ static int MG_mms_solve_nd(spinor **const up_new, spinor **const dn_new, spinor // 0 and shift f == Qsw_pm_ndpsi_shift) { // (Gamma5 Dh tau1)^2 - Schur complement squared with shift mg_scale *= mg_scale; - // DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh + // TM_USE_DDalphaAMG: tau1 gamma5 Dh tau1 gamma5 Dh // tmLQCD: gamma5 Dh tau1 gamma5 Dh tau1 DDalphaAMG_solve_ms_doublet_squared_odd(new2, old2, new1, old1, mg_even_shifts, mg_odd_shifts, no_shifts, precision, &mg_status); @@ -1110,7 +1110,7 @@ void MG_init() { mg_params.conf_index_fct = conf_index_fct; mg_params.vector_index_fct = vector_index_fct; - /* in DDalphaAMG + /* in TM_USE_DDalphaAMG * Printing level: * -1: silent (errors or warnings) * 0: minimal //default diff --git a/src/lib/DDalphaAMG_interface.h b/src/lib/DDalphaAMG_interface.h index 96f59c31e..cc7ae1678 100644 --- a/src/lib/DDalphaAMG_interface.h +++ b/src/lib/DDalphaAMG_interface.h @@ -17,7 +17,7 @@ * You should have received a copy of the GNU General Public License * along with tmLQCD. If not, see . * - * Interface for DDalphaAMG + * Interface for TM_USE_DDalphaAMG * *******************************************************************************/ diff --git a/src/lib/buffers/utils_generic_exchange.blocking.inc b/src/lib/buffers/utils_generic_exchange.blocking.inc index e6e5f975c..71b44900c 100644 --- a/src/lib/buffers/utils_generic_exchange.blocking.inc +++ b/src/lib/buffers/utils_generic_exchange.blocking.inc @@ -26,7 +26,7 @@ g_cart_grid, &status); } -# if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +# if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Sendrecv(buffer[0], 1, slice_X_gath_type, g_nb_x_dn, 93, @@ -108,10 +108,10 @@ 1, edge_XT_cont_type, g_nb_t_dn, 98, g_cart_grid, &status); } - /* end of if defined PARALLELXT || PARALLELXYT || PARALLELXYZT*/ + /* end of if defined TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT*/ # endif -# if (defined PARALLELXYT || defined PARALLELXYZT) +# if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Sendrecv(buffer[0], 1, slice_Y_gath_type, g_nb_y_dn, 103, @@ -247,9 +247,9 @@ g_cart_grid, &status); } - /* end of if defined PARALLELXYT || PARALLELXYZT */ + /* end of if defined TM_PARALLELXYT || TM_PARALLELXYZT */ # endif -# if defined PARALLELXYZT +# if defined TM_PARALLELXYZT /* z-Rand */ /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ @@ -454,4 +454,4 @@ } -#endif /* PARALLELXYZT */ +#endif /* TM_PARALLELXYZT */ diff --git a/src/lib/buffers/utils_generic_exchange.c b/src/lib/buffers/utils_generic_exchange.c index c1c3c844a..474c738ad 100644 --- a/src/lib/buffers/utils_generic_exchange.c +++ b/src/lib/buffers/utils_generic_exchange.c @@ -4,13 +4,13 @@ void generic_exchange(void *field_in, int bytes_per_site) {} #else /* MPI */ void generic_exchange(void *field_in, int bytes_per_site) { -#if defined _NON_BLOCKING +#if defined TM_NON_BLOCKING int cntr = 0; MPI_Request request[108]; MPI_Status status[108]; -#else /* _NON_BLOCKING */ +#else /* TM_NON_BLOCKING */ MPI_Status status; -#endif /* _NON_BLOCKING */ +#endif /* TM_NON_BLOCKING */ static int initialized = 0; /* We start by defining all the MPI datatypes required */ @@ -125,11 +125,11 @@ void generic_exchange(void *field_in, int bytes_per_site) { } /* Following are implementations using different compile time flags */ -#if defined _NON_BLOCKING +#if defined TM_NON_BLOCKING #include "utils_generic_exchange.nonblocking.inc" -#else /* _NON_BLOCKING */ +#else /* TM_NON_BLOCKING */ #include "utils_generic_exchange.blocking.inc" -#endif /* _NON_BLOCKING */ +#endif /* TM_NON_BLOCKING */ } #endif /* MPI */ diff --git a/src/lib/buffers/utils_generic_exchange.nonblocking.inc b/src/lib/buffers/utils_generic_exchange.nonblocking.inc index 0789a490f..71409008f 100644 --- a/src/lib/buffers/utils_generic_exchange.nonblocking.inc +++ b/src/lib/buffers/utils_generic_exchange.nonblocking.inc @@ -32,7 +32,7 @@ cntr=cntr+2; } -# if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +# if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend(buffer[0], 1, slice_X_gath_type, g_nb_x_dn, 87, @@ -71,7 +71,7 @@ # endif MPI_Waitall(cntr, request, status); cntr=0; -# if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +# if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* The edges */ /* send the data to the neighbour on the left in t direction */ @@ -137,10 +137,10 @@ g_cart_grid, &request[cntr+1]); cntr=cntr+2; } - /* end of if defined PARALLELXT || PARALLELXYT || PARALLELXYZT*/ + /* end of if defined TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT*/ # endif -# if (defined PARALLELXYT || defined PARALLELXYZT) +# if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend(buffer[0], 1, slice_Y_gath_type, g_nb_y_dn, 106, @@ -177,7 +177,7 @@ # endif MPI_Waitall(cntr, request, status); cntr=0; -# if (defined PARALLELXYT || defined PARALLELXYZT) +# if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* jetzt wirds richtig eklig ... */ @@ -312,9 +312,9 @@ cntr=cntr+2; } - /* end of if defined PARALLELXYT || PARALLELXYZT */ + /* end of if defined TM_PARALLELXYT || TM_PARALLELXYZT */ # endif -# if defined PARALLELXYZT +# if defined TM_PARALLELXYZT /* z-Rand */ /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ @@ -359,7 +359,7 @@ } # endif MPI_Waitall(cntr, request, status); -# if defined PARALLELXYZT +# if defined TM_PARALLELXYZT cntr=0; /* edges */ diff --git a/src/lib/deriv_Sb.c b/src/lib/deriv_Sb.c index 4303c80d5..7b55eb170 100644 --- a/src/lib/deriv_Sb.c +++ b/src/lib/deriv_Sb.c @@ -56,7 +56,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field_t* const hf, const double factor) { tm_stopwatch_push(&g_timers, __func__, ""); -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(hf->gaugefield); } @@ -85,7 +85,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field #undef static #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(derivSb) #endif @@ -114,7 +114,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sp = k + icy; -#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR) +#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) up = &g_gauge_field_copy[icx][0]; #else up = &hf->gaugefield[ix][0]; @@ -136,7 +136,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sm = k + icy; -#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR) +#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) um = up + 1; #else um = &hf->gaugefield[iy][0]; @@ -159,7 +159,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sp = k + icy; -#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR) +#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) up = um + 1; #else up = &hf->gaugefield[ix][1]; @@ -181,7 +181,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sm = k + icy; -#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR) +#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) um = up + 1; #else um = &hf->gaugefield[iy][1]; @@ -203,7 +203,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sp = k + icy; -#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR) +#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) up = um + 1; #else up = &hf->gaugefield[ix][2]; @@ -225,7 +225,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sm = k + icy; -#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR) +#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) um = up + 1; #else um = &hf->gaugefield[iy][2]; @@ -247,7 +247,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sp = k + icy; -#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR) +#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) up = um + 1; #else up = &hf->gaugefield[ix][3]; @@ -269,7 +269,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sm = k + icy; -#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR) +#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) um = up + 1; #else um = &hf->gaugefield[iy][3]; @@ -292,7 +292,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field } /* OpenMP closing brace */ #endif tm_stopwatch_pop(&g_timers, 0, 1, ""); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(derivSb) #endif } diff --git a/src/lib/deriv_Sb_D_psi.c b/src/lib/deriv_Sb_D_psi.c index 6ba15d490..61da4b9d2 100644 --- a/src/lib/deriv_Sb_D_psi.c +++ b/src/lib/deriv_Sb_D_psi.c @@ -63,7 +63,7 @@ void deriv_Sb_D_psi(spinor* const l, spinor* const k, hamiltonian_field_t* const #undef static #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(derivSb) #endif @@ -225,7 +225,7 @@ void deriv_Sb_D_psi(spinor* const l, spinor* const k, hamiltonian_field_t* const /****************** end of loop ************************/ } -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(derivSb) #endif diff --git a/fixed_volume.h.in b/src/lib/fixed_volume.h.in similarity index 100% rename from fixed_volume.h.in rename to src/lib/fixed_volume.h.in diff --git a/src/lib/geometry_eo.c b/src/lib/geometry_eo.c index 8622131e9..ceb348e1a 100644 --- a/src/lib/geometry_eo.c +++ b/src/lib/geometry_eo.c @@ -45,7 +45,7 @@ void Hopping_Matrix_Indices(void); -#if ((defined PARALLELX) || (defined PARALLELXY) || (defined PARALLELXYZ)) +#if ((defined TM_PARALLELX) || (defined TM_PARALLELXY) || (defined TM_PARALLELXYZ)) /* This is the version of the function Index introduced for Aurora-like parallelizations (mainly * xyz) */ @@ -72,7 +72,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) { ix = VOLUME + T * LY * LZ + y0 * LY * LZ + y2 * LZ + y3; } -#if (defined PARALLELXY || defined PARALLELXYZ) +#if (defined TM_PARALLELXY || defined TM_PARALLELXYZ) /* y-Rand */ if (x2 == LY) { ix = VOLUME + 2 * T * LY * LZ + y0 * LX * LZ + y1 * LZ + y3; @@ -97,9 +97,9 @@ int Index(const int x0, const int x1, const int x2, const int x3) { ix = VOLUME + RAND + 3 * T * LZ + y0 * LZ + y3; } } -#endif /* endif of PARALLELXY || PARALLELXYZ */ +#endif /* endif of TM_PARALLELXY || TM_PARALLELXYZ */ -#if defined PARALLELXYZ +#if defined TM_PARALLELXYZ /* z-Rand */ if (x3 == LZ) { ix = VOLUME + 2 * T * LY * LZ + 2 * T * LX * LZ + y0 * LX * LY + y1 * LY + y2; @@ -142,7 +142,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) { } } -#endif /* endif of PARALLELXYZ */ +#endif /* endif of TM_PARALLELXYZ */ /* The DBW2 stuff --> second boundary slice */ /* This we put a the very end. */ @@ -150,44 +150,44 @@ int Index(const int x0, const int x1, const int x2, const int x3) { /* x2-rand+ */ if (x1 == LX + 1) { ix = VOLUMEPLUSRAND + y0 * LY * LZ + y2 * LZ + y3; -#if (defined PARALLELXY || defined PARALLELXYZ) +#if (defined TM_PARALLELXY || defined TM_PARALLELXYZ) /* x2y */ if (x2 == LY) { ix = VOLUMEPLUSRAND + RAND + y0 * LZ + y3; } else if (x2 == -1) { ix = VOLUMEPLUSRAND + RAND + 1 * T * LZ + y0 * LZ + y3; } -#endif /* endif of PARALLELXY || PARALLELXYZ */ -#if defined PARALLELXYZ +#endif /* endif of TM_PARALLELXY || TM_PARALLELXYZ */ +#if defined TM_PARALLELXYZ /* x2z */ else if (x3 == LZ) { ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 4 * T * LY + y0 * LY + y2; } else if (x3 == -1) { ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 5 * T * LY + y0 * LY + y2; } -#endif /* endif of PARALLELXYZ */ +#endif /* endif of TM_PARALLELXYZ */ } /* x2-rand- */ if (x1 == -2) { ix = VOLUMEPLUSRAND + T * LY * LZ + y0 * LY * LZ + y2 * LZ + y3; -#if (defined PARALLELXY || defined PARALLELXYZ) +#if (defined TM_PARALLELXY || defined TM_PARALLELXYZ) /* x2y */ if (x2 == LY) { ix = VOLUMEPLUSRAND + RAND + 2 * T * LZ + y0 * LZ + y3; } else if (x2 == -1) { ix = VOLUMEPLUSRAND + RAND + 3 * T * LZ + y0 * LZ + y3; } -#endif /* endif of PARALLELXY || PARALLELXYZ */ -#if defined PARALLELXYZ +#endif /* endif of TM_PARALLELXY || TM_PARALLELXYZ */ +#if defined TM_PARALLELXYZ /* x2z */ else if (x3 == LZ) { ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 6 * T * LY + y0 * LY + y2; } else if (x3 == -1) { ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 7 * T * LY + y0 * LY + y2; } -#endif /* endif of PARALLELXYZ */ +#endif /* endif of TM_PARALLELXYZ */ } -#if (defined PARALLELXY || defined PARALLELXYZ) +#if (defined TM_PARALLELXY || defined TM_PARALLELXYZ) /* y2-rand+ */ if (x2 == LY + 1) { ix = VOLUMEPLUSRAND + 2 * T * LY * LZ + y0 * LX * LZ + y1 * LZ + y3; @@ -197,14 +197,14 @@ int Index(const int x0, const int x1, const int x2, const int x3) { } else if (x1 == -1) { ix = VOLUMEPLUSRAND + RAND + 6 * T * LZ + y0 * LZ + y3; } -#if defined PARALLELXYZ +#if defined TM_PARALLELXYZ /* y2z */ else if (x3 == LZ) { ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 8 * T * LY + 4 * T * LX + y0 * LX + y1; } else if (x3 == -1) { ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 8 * T * LY + 5 * T * LX + y0 * LX + y1; } -#endif /* endif of PARALLELXYZ */ +#endif /* endif of TM_PARALLELXYZ */ } /* y2-rand- */ if (x2 == -2) { @@ -215,17 +215,17 @@ int Index(const int x0, const int x1, const int x2, const int x3) { } else if (x1 == -1) { ix = VOLUMEPLUSRAND + RAND + 7 * T * LZ + y0 * LZ + y3; } -#if defined PARALLELXYZ +#if defined TM_PARALLELXYZ /* y2z */ else if (x3 == LZ) { ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 8 * T * LY + 6 * T * LX + y0 * LX + y1; } else if (x3 == -1) { ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 8 * T * LY + 7 * T * LX + y0 * LX + y1; } -#endif /* endif of PARALLELXYZ */ +#endif /* endif of TM_PARALLELXYZ */ } -#endif /* endif of PARALLELXY || PARALLELXYZ */ -#if defined PARALLELXYZ +#endif /* endif of TM_PARALLELXY || TM_PARALLELXYZ */ +#if defined TM_PARALLELXYZ /* z2-rand+ */ if (x3 == LZ + 1) { ix = VOLUMEPLUSRAND + 2 * T * LY * LZ + 2 * T * LX * LZ + y0 * LX * LY + y1 * LY + y2; @@ -259,12 +259,12 @@ int Index(const int x0, const int x1, const int x2, const int x3) { ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 8 * T * LY + 3 * T * LX + y0 * LX + y1; } } -#endif /* endif of PARALLELXYZ */ +#endif /* endif of TM_PARALLELXYZ */ return (ix); } -#else /* original version of Index(): used for no parallelization or PARALLEL*T */ +#else /* original version of Index(): used for no parallelization or TM_PARALLEL*T */ int Index(const int x0, const int x1, const int x2, const int x3) { int y0, y1, y2, y3, ix; @@ -274,7 +274,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) { y3 = (x3 + LZ) % LZ; ix = ((y0 * LX + y1) * LY + y2) * LZ + y3; -#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (x0 == T) { ix = VOLUME + y3 + LZ * y2 + LZ * LY * y1; } @@ -283,7 +283,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) { ix = VOLUME + LX * LY * LZ + y3 + LZ * y2 + LZ * LY * y1; } #endif -#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (x1 == LX) { ix = VOLUME + 2 * LX * LY * LZ + y0 * LY * LZ + y2 * LZ + y3; } @@ -309,9 +309,9 @@ int Index(const int x0, const int x1, const int x2, const int x3) { } } -#endif /* endif of PARALLELXT || PARALLELXYT || PARALLELXYZT */ +#endif /* endif of TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT */ -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* y-Rand */ if (x2 == LY) { ix = VOLUME + 2 * LX * LY * LZ + 2 * T * LY * LZ + y0 * LX * LZ + y1 * LZ + y3; @@ -358,8 +358,8 @@ int Index(const int x0, const int x1, const int x2, const int x3) { } } -#endif /* endif of PARALLELXYT || PARALLELXYZT */ -#if defined PARALLELXYZT +#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT */ +#if defined TM_PARALLELXYZT /* z-Rand */ if (x3 == LZ) { ix = @@ -429,30 +429,30 @@ int Index(const int x0, const int x1, const int x2, const int x3) { } } -#endif /* endif of PARALLELXYZT */ +#endif /* endif of TM_PARALLELXYZT */ /* The DBW2 stuff --> second boundary slice */ /* This we put a the very end. */ -#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (x0 == T + 1) { ix = VOLUMEPLUSRAND + y3 + LZ * y2 + LZ * LY * y1; -#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) /* t2x */ if (x1 == LX) { ix = VOLUMEPLUSRAND + RAND + y2 * LZ + y3; } else if (x1 == -1) { ix = VOLUMEPLUSRAND + RAND + 1 * LY * LZ + y2 * LZ + y3; } -#endif /* endif of PARALLELXT || PARALLELXYT || PARALLELXYZT */ -#if (defined PARALLELXYT || defined PARALLELXYZT) +#endif /* endif of TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT */ +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* t2y */ else if (x2 == LY) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + y1 * LZ + y3; } else if (x2 == -1) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 2 * LX * LZ + y1 * LZ + y3; } -#endif /* endif of PARALLELXYT || PARALLELXYZT */ -#if defined PARALLELXYZT +#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT */ +#if defined TM_PARALLELXYZT /* t2z */ else if (x3 == LZ) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + y1 * LY + y2; @@ -460,28 +460,28 @@ int Index(const int x0, const int x1, const int x2, const int x3) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 2 * LX * LY + y1 * LY + y2; } -#endif /* endif of PARALLELXYZT */ +#endif /* endif of TM_PARALLELXYZT */ } /* the slice at time -2 is put behind the one at time T+1 */ else if (x0 == -2) { ix = VOLUMEPLUSRAND + LX * LY * LZ + y3 + LZ * y2 + LZ * LY * y1; -#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) /* t2x */ if (x1 == LX) { ix = VOLUMEPLUSRAND + RAND + 2 * LY * LZ + y2 * LZ + y3; } else if (x1 == -1) { ix = VOLUMEPLUSRAND + RAND + 3 * LY * LZ + y2 * LZ + y3; } -#endif /* endif of PARALLELXT || PARALLELXYT || PARALLELXYZT */ -#if (defined PARALLELXYT || defined PARALLELXYZT) +#endif /* endif of TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT */ +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* t2y */ else if (x2 == LY) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + LX * LZ + y1 * LZ + y3; } else if (x2 == -1) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 3 * LX * LZ + y1 * LZ + y3; } -#endif /* endif of PARALLELXYT || PARALLELXYZT */ -#if defined PARALLELXYZT +#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT */ +#if defined TM_PARALLELXYZT /* t2z */ else if (x3 == LZ) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + LX * LY + y1 * LY + y2; @@ -489,10 +489,10 @@ int Index(const int x0, const int x1, const int x2, const int x3) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 3 * LX * LY + y1 * LY + y2; } -#endif /* endif of PARALLELXYZT */ +#endif /* endif of TM_PARALLELXYZT */ } -#endif /* endif of PARALLELT || PARALLELXT || PARALLELXYT || PARALLELXYZT */ -#if ((defined PARALLELXT) || (defined PARALLELXYT) || defined PARALLELXYZT) +#endif /* endif of TM_PARALLELT || TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT */ +#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || defined TM_PARALLELXYZT) if (x1 == LX + 1) { ix = VOLUMEPLUSRAND + 2 * LX * LY * LZ + y0 * LY * LZ + y2 * LZ + y3; /* x2t */ @@ -501,15 +501,15 @@ int Index(const int x0, const int x1, const int x2, const int x3) { } else if (x0 == -1) { ix = VOLUMEPLUSRAND + RAND + 6 * LY * LZ + y2 * LZ + y3; } -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* x2y */ else if (x2 == LY) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + y0 * LZ + y3; } else if (x2 == -1) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 1 * T * LZ + y0 * LZ + y3; } -#endif /* endif of PARALLELXYT || PARALLELXYZT */ -#if defined PARALLELXYZT +#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT */ +#if defined TM_PARALLELXYZT /* x2z */ else if (x3 == LZ) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY + @@ -518,7 +518,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY + 5 * T * LY + y0 * LY + y2; } -#endif /* endif of PARALLELXYZT */ +#endif /* endif of TM_PARALLELXYZT */ } if (x1 == -2) { ix = VOLUMEPLUSRAND + 2 * LX * LY * LZ + T * LY * LZ + y0 * LY * LZ + y2 * LZ + y3; @@ -528,15 +528,15 @@ int Index(const int x0, const int x1, const int x2, const int x3) { } else if (x0 == -1) { ix = VOLUMEPLUSRAND + RAND + 7 * LY * LZ + y2 * LZ + y3; } -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* x2y */ else if (x2 == LY) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 2 * T * LZ + y0 * LZ + y3; } else if (x2 == -1) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 3 * T * LZ + y0 * LZ + y3; } -#endif /* endif of PARALLELXYT || PARALLELXYZT */ -#if defined PARALLELXYZT +#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT */ +#if defined TM_PARALLELXYZT /* x2z */ else if (x3 == LZ) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY + @@ -545,10 +545,10 @@ int Index(const int x0, const int x1, const int x2, const int x3) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY + 7 * T * LY + y0 * LY + y2; } -#endif /* endif of PARALLELXYZT */ +#endif /* endif of TM_PARALLELXYZT */ } -#endif /* endif of PARALLELXT || PARALLELXYT || PARALLELXYZT */ -#if (defined PARALLELXYT || defined PARALLELXYZT) +#endif /* endif of TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT */ +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) if (x2 == LY + 1) { ix = VOLUMEPLUSRAND + 2 * LX * LY * LZ + 2 * T * LY * LZ + y0 * LX * LZ + y1 * LZ + y3; /* y2x */ @@ -563,7 +563,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) { } else if (x0 == -1) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 5 * LX * LZ + y1 * LZ + y3; } -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT /* y2z */ else if (x3 == LZ) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY + @@ -572,7 +572,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY + 8 * T * LY + 5 * T * LX + y0 * LX + y1; } -#endif /* endif of PARALLELXYZT */ +#endif /* endif of TM_PARALLELXYZT */ } if (x2 == -2) { ix = VOLUMEPLUSRAND + 2 * LX * LY * LZ + 2 * T * LY * LZ + T * LX * LZ + y0 * LX * LZ + @@ -589,7 +589,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) { } else if (x0 == -1) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 7 * LX * LZ + y1 * LZ + y3; } -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT /* y2z */ else if (x3 == LZ) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY + @@ -598,10 +598,10 @@ int Index(const int x0, const int x1, const int x2, const int x3) { ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY + 8 * T * LY + 7 * T * LX + y0 * LX + y1; } -#endif /* endif of PARALLELXYZT */ +#endif /* endif of TM_PARALLELXYZT */ } -#endif /* endif of PARALLELXYT || PARALLELXYZT */ -#if defined PARALLELXYZT +#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT */ +#if defined TM_PARALLELXYZT /* z2-Rand */ if (x3 == LZ + 1) { if ((x0 < T) && (x0 > -1) && (x1 < LX) && (x1 > -1) && (x2 > -1) && (x2 < LY)) { @@ -663,14 +663,14 @@ int Index(const int x0, const int x1, const int x2, const int x3) { 8 * T * LY + 3 * T * LX + y0 * LX + y1; } } -#endif /* endif of PARALLELXYZT */ +#endif /* endif of TM_PARALLELXYZT */ /* if(ix == 372) { */ /* printf("## %d %d %d %d ix = %d, %d %d %d %d\n", x0, x1, x2, x3, ix, T, LX, LY, LZ); */ /* } */ return (ix); } -#endif /* PARALLEL??? */ +#endif /* TM_PARALLEL??? */ void geometry() { int x0, x1, x2, x3, ix; @@ -685,17 +685,17 @@ void geometry() { xeven = malloc(VOLUMEPLUSRAND * sizeof(int)); -#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) startvaluet = 1; #endif -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || \ - defined PARALLELXY || defined PARALLELXYZ) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELX || \ + defined TM_PARALLELXY || defined TM_PARALLELXYZ) startvaluex = 1; #endif -#if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || defined TM_PARALLELXYZ) startvaluey = 1; #endif -#if (defined PARALLELXYZT || defined PARALLELXYZ) +#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) startvaluez = 1; #endif @@ -795,7 +795,7 @@ void geometry() { } } -#if (defined PARALLELXYZT || defined PARALLELXYZ) +#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) ix = 0; for (x0 = 0; x0 < T; x0++) { for (x1 = 0; x1 < LX; x1++) { @@ -852,7 +852,7 @@ void geometry() { } -#endif /* PARALLELXYZ || PARALLELXYZT*/ +#endif /* TM_PARALLELXYZ || TM_PARALLELXYZT*/ /* The rectangular gauge action part */ /* Everything is stored behind VOLUMEPLUSRAND-1 !*/ @@ -861,7 +861,7 @@ void geometry() { printf("# Initialising rectangular gauge action stuff\n"); fflush(stdout); } -#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) for (x1 = -startvaluex; x1 < (LX + startvaluex); x1++) { for (x2 = -startvaluey; x2 < (LY + startvaluey); x2++) { for (x3 = -startvaluez; x3 < (LZ + startvaluez); x3++) { @@ -910,8 +910,8 @@ void geometry() { } } #endif -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || \ - defined PARALLELXY || defined PARALLELXYZ) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELX || \ + defined TM_PARALLELXY || defined TM_PARALLELXYZ) for (x0 = -startvaluet; x0 < (T + startvaluet); x0++) { for (x2 = -startvaluey; x2 < (LY + startvaluey); x2++) { for (x3 = -startvaluez; x3 < (LZ + startvaluez); x3++) { @@ -959,7 +959,7 @@ void geometry() { } } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || defined TM_PARALLELXYZ) for (x0 = -startvaluet; x0 < (T + startvaluet); x0++) { for (x1 = -startvaluex; x1 < (LX + startvaluex); x1++) { for (x3 = -startvaluez; x3 < (LZ + startvaluez); x3++) { @@ -1007,7 +1007,7 @@ void geometry() { } } #endif -#if (defined PARALLELXYZT || defined PARALLELXYZ) +#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) for (x0 = -startvaluet; x0 < (T + startvaluet); x0++) { for (x1 = -startvaluex; x1 < (LX + startvaluex); x1++) { for (x2 = -startvaluey; x2 < (LY + startvaluey); x2++) { diff --git a/src/lib/get_rectangle_staples.c b/src/lib/get_rectangle_staples.c index eb2a7db9f..eab6b9d9e 100644 --- a/src/lib/get_rectangle_staples.c +++ b/src/lib/get_rectangle_staples.c @@ -34,7 +34,7 @@ void get_rectangle_staples_general(su3 *const v, const int x, const int mu, const su3 *const *const gf) { su3 ALIGN tmp1, tmp2; const su3 *a, *b, *c, *d, *e; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(rectstaples) #endif _su3_zero((*v)); @@ -178,7 +178,7 @@ void get_rectangle_staples_general(su3 *const v, const int x, const int mu, _su3_times_su3_acc((*v), tmp2, tmp1); } } -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(rectstaples) #endif } diff --git a/src/lib/get_staples.c b/src/lib/get_staples.c index e80648382..b33010f2c 100644 --- a/src/lib/get_staples.c +++ b/src/lib/get_staples.c @@ -35,7 +35,7 @@ void get_staples(su3* const staple, const int x, const int mu, const su3** in_ga su3 ALIGN st; const su3 *w1, *w2, *w3; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(staples) #endif @@ -61,7 +61,7 @@ void get_staples(su3* const staple, const int x, const int mu, const su3** in_ga _su3d_times_su3_acc(*staple, *w1, st); } } -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(staples) #endif } @@ -72,7 +72,7 @@ void get_spacelike_staples(su3* const staple, const int x, const int mu, su3 ALIGN st; const su3 *w1, *w2, *w3; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(staples) #endif @@ -98,7 +98,7 @@ void get_spacelike_staples(su3* const staple, const int x, const int mu, _su3d_times_su3_acc(*staple, *w1, st); } } -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(staples) #endif } @@ -109,7 +109,7 @@ void get_timelike_staples(su3* const staple, const int x, const int mu, su3 ALIGN st; const su3 *w1, *w2, *w3; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(staples) #endif @@ -134,7 +134,7 @@ void get_timelike_staples(su3* const staple, const int x, const int mu, /* v = v + w1^d * st */ _su3d_times_su3_acc(*staple, *w1, st); } -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(staples) #endif } diff --git a/src/lib/gettime.c b/src/lib/gettime.c index 68c123ae4..adae6dcb3 100644 --- a/src/lib/gettime.c +++ b/src/lib/gettime.c @@ -21,7 +21,7 @@ #ifdef HAVE_CONFIG_H #include #endif -#ifdef HAVE_CLOCK_GETTIME +#ifdef TM_CLOCK_GETTIME #ifndef _POSIX_C_SOURCE #define _POSIX_C_SOURCE 199309L #endif @@ -45,7 +45,7 @@ double gettime(void) { t = MPI_Wtime(); -#elif (defined HAVE_CLOCK_GETTIME) +#elif (defined TM_CLOCK_GETTIME) struct timespec ts; diff --git a/src/lib/git_hash.h b/src/lib/git_hash.h new file mode 100644 index 000000000..a3a22b48d --- /dev/null +++ b/src/lib/git_hash.h @@ -0,0 +1,6 @@ +#ifndef TM_GIT_HASH_H +#define TM_GIT_HASH_H + +extern const char git_hash[]; + +#endif diff --git a/src/lib/global.h b/src/lib/global.h index 1fc644d3e..b0d3b1ac2 100644 --- a/src/lib/global.h +++ b/src/lib/global.h @@ -38,7 +38,7 @@ #ifdef TM_USE_MPI #include #endif -#ifdef FIXEDVOLUME +#ifdef TM_FIXEDVOLUME #include "fixed_volume.h" #endif #include "su3.h" @@ -79,7 +79,7 @@ EXTERN tm_mpi_thread_level_t g_mpi_thread_level; EXTERN tm_timers_t g_timers; EXTERN int T_global; -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME EXTERN int T, L, LX, LY, LZ, VOLUME; EXTERN int N_PROC_T, N_PROC_X, N_PROC_Y, N_PROC_Z; EXTERN int RAND, EDGES, VOLUMEPLUSRAND; @@ -130,7 +130,7 @@ EXTERN int g_running_phmc; EXTERN su3 **g_gauge_field; EXTERN su3_32 **g_gauge_field_32; -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR EXTERN su3 ***g_gauge_field_copy; EXTERN su3_32 ***g_gauge_field_copy_32; #else diff --git a/src/lib/init/init_dirac_halfspinor.c b/src/lib/init/init_dirac_halfspinor.c index f5939d9cc..891a703e2 100644 --- a/src/lib/init/init_dirac_halfspinor.c +++ b/src/lib/init/init_dirac_halfspinor.c @@ -94,7 +94,7 @@ int init_dirac_halfspinor() { NBPointer[ieo][8 * i + 2 * mu + 1] = &HalfSpinor[8 * g_lexic2eosub[g_iup[j][mu]] + 2 * mu + 1]; } -#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (t == 0) { k = (g_lexic2eosub[g_idn[j][0]] - VOLUME / 2); NBPointer[ieo][8 * i] = &sendBuffer[k]; @@ -104,8 +104,8 @@ int init_dirac_halfspinor() { NBPointer[ieo][8 * i + 1] = &sendBuffer[k]; } #endif -#if ((defined PARALLELX) || (defined PARALLELXY) || (defined PARALLELXYZ) || \ - (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELX) || (defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || \ + (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (x == 0) { k = (g_lexic2eosub[g_idn[j][1]] - VOLUME / 2); NBPointer[ieo][8 * i + 2] = &sendBuffer[k]; @@ -115,8 +115,8 @@ int init_dirac_halfspinor() { NBPointer[ieo][8 * i + 3] = &sendBuffer[k]; } #endif -#if ((defined PARALLELXY) || (defined PARALLELXYZ) || (defined PARALLELXYT) || \ - (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || (defined TM_PARALLELXYT) || \ + (defined TM_PARALLELXYZT)) if (y == 0) { k = (g_lexic2eosub[g_idn[j][2]] - VOLUME / 2); NBPointer[ieo][8 * i + 4] = &sendBuffer[k]; @@ -126,7 +126,7 @@ int init_dirac_halfspinor() { NBPointer[ieo][8 * i + 5] = &sendBuffer[k]; } #endif -#if ((defined PARALLELXYZ) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXYZ) || (defined TM_PARALLELXYZT)) if (z == 0) { k = (g_lexic2eosub[g_idn[j][3]] - VOLUME / 2); NBPointer[ieo][8 * i + 6] = &sendBuffer[k]; @@ -154,7 +154,7 @@ int init_dirac_halfspinor() { for (int mu = 0; mu < 8; mu++) { NBPointer[ieo][8 * i + mu] = &HalfSpinor[8 * i + mu]; } -#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (t == T - 1) { NBPointer[ieo][8 * i] = &recvBuffer[(g_lexic2eosub[g_iup[j][0]] - VOLUME / 2)]; } @@ -162,8 +162,8 @@ int init_dirac_halfspinor() { NBPointer[ieo][8 * i + 1] = &recvBuffer[(g_lexic2eosub[g_idn[j][0]] - VOLUME / 2)]; } #endif -#if ((defined PARALLELX) || (defined PARALLELXY) || (defined PARALLELXYZ) || \ - (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELX) || (defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || \ + (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (x == LX - 1) { NBPointer[ieo][8 * i + 2] = &recvBuffer[(g_lexic2eosub[g_iup[j][1]] - VOLUME / 2)]; } @@ -171,8 +171,8 @@ int init_dirac_halfspinor() { NBPointer[ieo][8 * i + 3] = &recvBuffer[(g_lexic2eosub[g_idn[j][1]] - VOLUME / 2)]; } #endif -#if ((defined PARALLELXY) || (defined PARALLELXYZ) || (defined PARALLELXYT) || \ - (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || (defined TM_PARALLELXYT) || \ + (defined TM_PARALLELXYZT)) if (y == LY - 1) { NBPointer[ieo][8 * i + 4] = &recvBuffer[(g_lexic2eosub[g_iup[j][2]] - VOLUME / 2)]; } @@ -180,7 +180,7 @@ int init_dirac_halfspinor() { NBPointer[ieo][8 * i + 5] = &recvBuffer[(g_lexic2eosub[g_idn[j][2]] - VOLUME / 2)]; } #endif -#if ((defined PARALLELXYZ) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXYZ) || (defined TM_PARALLELXYZT)) if (z == LZ - 1) { NBPointer[ieo][8 * i + 6] = &recvBuffer[(g_lexic2eosub[g_iup[j][3]] - VOLUME / 2)]; } @@ -240,7 +240,7 @@ int init_dirac_halfspinor32() { NBPointer32[ieo][8 * i + 2 * mu + 1] = &HalfSpinor32[8 * g_lexic2eosub[g_iup[j][mu]] + 2 * mu + 1]; } -#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (t == 0) { k = (g_lexic2eosub[g_idn[j][0]] - VOLUME / 2); NBPointer32[ieo][8 * i] = &sendBuffer32[k]; @@ -250,8 +250,8 @@ int init_dirac_halfspinor32() { NBPointer32[ieo][8 * i + 1] = &sendBuffer32[k]; } #endif -#if ((defined PARALLELX) || (defined PARALLELXY) || (defined PARALLELXYZ) || \ - (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELX) || (defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || \ + (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (x == 0) { k = (g_lexic2eosub[g_idn[j][1]] - VOLUME / 2); NBPointer32[ieo][8 * i + 2] = &sendBuffer32[k]; @@ -261,8 +261,8 @@ int init_dirac_halfspinor32() { NBPointer32[ieo][8 * i + 3] = &sendBuffer32[k]; } #endif -#if ((defined PARALLELXY) || (defined PARALLELXYZ) || (defined PARALLELXYT) || \ - (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || (defined TM_PARALLELXYT) || \ + (defined TM_PARALLELXYZT)) if (y == 0) { k = (g_lexic2eosub[g_idn[j][2]] - VOLUME / 2); NBPointer32[ieo][8 * i + 4] = &sendBuffer32[k]; @@ -272,7 +272,7 @@ int init_dirac_halfspinor32() { NBPointer32[ieo][8 * i + 5] = &sendBuffer32[k]; } #endif -#if ((defined PARALLELXYZ) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXYZ) || (defined TM_PARALLELXYZT)) if (z == 0) { k = (g_lexic2eosub[g_idn[j][3]] - VOLUME / 2); NBPointer32[ieo][8 * i + 6] = &sendBuffer32[k]; @@ -300,7 +300,7 @@ int init_dirac_halfspinor32() { for (mu = 0; mu < 8; mu++) { NBPointer32[ieo][8 * i + mu] = &HalfSpinor32[8 * i + mu]; } -#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (t == T - 1) { NBPointer32[ieo][8 * i] = &recvBuffer32[(g_lexic2eosub[g_iup[j][0]] - VOLUME / 2)]; } @@ -308,8 +308,8 @@ int init_dirac_halfspinor32() { NBPointer32[ieo][8 * i + 1] = &recvBuffer32[(g_lexic2eosub[g_idn[j][0]] - VOLUME / 2)]; } #endif -#if ((defined PARALLELX) || (defined PARALLELXY) || (defined PARALLELXYZ) || \ - (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELX) || (defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || \ + (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (x == LX - 1) { NBPointer32[ieo][8 * i + 2] = &recvBuffer32[(g_lexic2eosub[g_iup[j][1]] - VOLUME / 2)]; } @@ -317,8 +317,8 @@ int init_dirac_halfspinor32() { NBPointer32[ieo][8 * i + 3] = &recvBuffer32[(g_lexic2eosub[g_idn[j][1]] - VOLUME / 2)]; } #endif -#if ((defined PARALLELXY) || (defined PARALLELXYZ) || (defined PARALLELXYT) || \ - (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || (defined TM_PARALLELXYT) || \ + (defined TM_PARALLELXYZT)) if (y == LY - 1) { NBPointer32[ieo][8 * i + 4] = &recvBuffer32[(g_lexic2eosub[g_iup[j][2]] - VOLUME / 2)]; } @@ -326,7 +326,7 @@ int init_dirac_halfspinor32() { NBPointer32[ieo][8 * i + 5] = &recvBuffer32[(g_lexic2eosub[g_idn[j][2]] - VOLUME / 2)]; } #endif -#if ((defined PARALLELXYZ) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXYZ) || (defined TM_PARALLELXYZT)) if (z == LZ - 1) { NBPointer32[ieo][8 * i + 6] = &recvBuffer32[(g_lexic2eosub[g_iup[j][3]] - VOLUME / 2)]; } diff --git a/src/lib/init/init_gauge_field.c b/src/lib/init/init_gauge_field.c index e30e040bf..1ad4463a8 100644 --- a/src/lib/init/init_gauge_field.c +++ b/src/lib/init/init_gauge_field.c @@ -54,7 +54,7 @@ int init_gauge_field(const int V, const int back) { g_gauge_field[i] = g_gauge_field[i - 1] + 4; } -#if defined _USE_HALFSPINOR +#if defined TM_USE_HALFSPINOR if (back == 1) { /* g_gauge_field_copy[ieo][PM][sites/2][mu] @@ -134,7 +134,7 @@ int init_gauge_field_32(const int V, const int back) { g_gauge_field_32[i] = g_gauge_field_32[i - 1] + 4; } -#if defined _USE_HALFSPINOR +#if defined TM_USE_HALFSPINOR if (back == 1) { /* g_gauge_field_copy[ieo][PM][sites/2][mu] @@ -167,7 +167,7 @@ int init_gauge_field_32(const int V, const int back) { g_gauge_field_copy_32[1][i] = g_gauge_field_copy_32[1][i - 1] + 4; } } -#else /* than _USE_HALFSPINOR */ +#else /* than TM_USE_HALFSPINOR */ if (back == 1) { if ((void*)(g_gauge_field_copy_32 = (su3_32**)calloc((VOLUME + RAND), sizeof(su3_32*))) == NULL) { @@ -217,7 +217,7 @@ void convert_32_gauge_field(su3_32** gf32, su3** gf, int V) { gf32[i][mu].c22 = (_Complex float)gf[i][mu].c22; } } -#if defined _USE_HALFSPINOR +#if defined TM_USE_HALFSPINOR #endif } diff --git a/src/lib/init/init_geometry_indices.c b/src/lib/init/init_geometry_indices.c index ef54c45de..6b75fc83a 100644 --- a/src/lib/init/init_geometry_indices.c +++ b/src/lib/init/init_geometry_indices.c @@ -58,7 +58,7 @@ int init_geometry_indices(const int V) { g_eo2lexic = (int *)calloc(V, sizeof(int)); if ((void *)g_eo2lexic == NULL) return (11); -#if (defined PARALLELXYZT || defined PARALLELXYZ) +#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) g_field_z_ipt_even = (int *)calloc(T * LX * LY, sizeof(int)); if ((void *)g_field_z_ipt_even == NULL) return (12); g_field_z_ipt_odd = (int *)calloc(T * LX * LY, sizeof(int)); @@ -136,7 +136,7 @@ void free_geometry_indices() { free(g_eo2lexic); free(g_lexic2eosub); free(g_lexic2eo); -#if (defined PARALLELXYZT || defined PARALLELXYZ) +#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) free(g_field_z_ipt_odd); free(g_field_z_ipt_even); #endif diff --git a/src/lib/init/init_parallel.h b/src/lib/init/init_parallel.h index f88ebe1b4..553da6765 100644 --- a/src/lib/init/init_parallel.h +++ b/src/lib/init/init_parallel.h @@ -19,8 +19,8 @@ * *******************************************************************************/ -#ifndef _INIT_PARALLEL_H -#define _INIT_PARALLEL_H +#ifndef _INIT_TM_PARALLEL_H +#define _INIT_TM_PARALLEL_H void init_parallel_and_read_input(int argc, char *argv[], const char input_filename[]); diff --git a/src/lib/init/init_spinor_field.c b/src/lib/init/init_spinor_field.c index c70945634..6fea95cd8 100644 --- a/src/lib/init/init_spinor_field.c +++ b/src/lib/init/init_spinor_field.c @@ -23,7 +23,7 @@ #include #include #include -#ifdef _USE_SHMEM +#ifdef TM_USE_SHMEM #include #endif #include "global.h" @@ -37,7 +37,7 @@ spinor *sp_tbuff = NULL; int init_spinor_field(const int V, const int nr) { int i = 0; -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) if ((void *)(sp = (spinor *)shmalloc((nr * V + 1) * sizeof(spinor))) == NULL) { printf("malloc errno : %d\n", errno); errno = 0; @@ -65,7 +65,7 @@ int init_spinor_field(const int V, const int nr) { } void free_spinor_field() { -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) shfree(sp); shfree(sp_csg); #else @@ -78,7 +78,7 @@ spinor32 *sp32 = NULL; int init_spinor_field_32(const int V, const int nr) { int i = 0; -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) if ((void *)(sp32 = (spinor32 *)shmalloc((nr * V + 1) * sizeof(spinor32))) == NULL) { printf("malloc errno : %d\n", errno); errno = 0; @@ -106,7 +106,7 @@ int init_spinor_field_32(const int V, const int nr) { } void free_spinor_field_32() { -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) shfree(sp32); #else free(sp32); @@ -119,7 +119,7 @@ void free_spinor_field_32() { int allocate_spinor_field_array(spinor ***spinors, spinor **sp, const int V, const int nr) { int i = 0; -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) if ((void *)((*sp) = (spinor *)shmalloc((nr * V + 1) * sizeof(spinor))) == NULL) { printf("malloc errno : %d\n", errno); errno = 0; @@ -147,7 +147,7 @@ int allocate_spinor_field_array(spinor ***spinors, spinor **sp, const int V, con } void free_spinor_field_array(spinor **sp) { -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) shfree(*sp); #else free(*sp); @@ -165,7 +165,7 @@ int init_csg_field(const int V) { /* if all histories are zero, we do not need initialisation */ if (sum != 0) { -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) sp_csg = (spinor *)shmalloc((sum * V + 1) * sizeof(spinor)); #else sp_csg = (spinor *)calloc(sum * V + 1, sizeof(spinor)); diff --git a/src/lib/invert_clover_eo.c b/src/lib/invert_clover_eo.c index e3b6cad31..63e512819 100644 --- a/src/lib/invert_clover_eo.c +++ b/src/lib/invert_clover_eo.c @@ -53,7 +53,7 @@ #ifdef TM_USE_QUDA #include "quda_interface.h" #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif #ifdef TM_USE_QPHIX @@ -81,7 +81,7 @@ int invert_clover_eo(spinor* const Even_new, spinor* const Odd_new, spinor* cons } #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG if (solver_flag == MG) { return MG_solver_eo(Even_new, Odd_new, Even, Odd, precision, max_iter, rel_prec, VOLUME / 2, gf[0], &Msw_full); @@ -197,7 +197,7 @@ int invert_clover_eo(spinor* const Even_new, spinor* const Odd_new, spinor* cons rel_prec, VOLUME, Qsq); Qm(g_spinor_field[DUM_DERI + 1], g_spinor_field[DUM_DERI]); } -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG else if (solver_flag == MG) { return MG_solver_eo(Even_new, Odd_new, Even, Odd, precision, max_iter, rel_prec, VOLUME / 2, gf[0], &Msw_full); diff --git a/src/lib/invert_doublet_eo.c b/src/lib/invert_doublet_eo.c index 5be48415e..8d5a7dd82 100644 --- a/src/lib/invert_doublet_eo.c +++ b/src/lib/invert_doublet_eo.c @@ -50,7 +50,7 @@ #ifdef TM_USE_QUDA #include "quda_interface.h" #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif #ifdef TM_USE_QPHIX @@ -75,7 +75,7 @@ int invert_doublet_eo(spinor* const Even_new_s, spinor* const Odd_new_s, spinor* } #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG if (solver_flag == MG) { return MG_solver_nd_eo(Even_new_s, Odd_new_s, Even_new_c, Odd_new_c, Even_s, Odd_s, Even_c, Odd_c, precision, max_iter, rel_prec, VOLUME / 2, g_gauge_field, @@ -162,7 +162,7 @@ int invert_cloverdoublet_eo(spinor* const Even_new_s, spinor* const Odd_new_s, } #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG if (solver_flag == MG) { return MG_solver_nd_eo(Even_new_s, Odd_new_s, Even_new_c, Odd_new_c, Even_s, Odd_s, Even_c, Odd_c, precision, max_iter, rel_prec, VOLUME / 2, g_gauge_field, diff --git a/src/lib/invert_eo.c b/src/lib/invert_eo.c index 997cab021..3b7625d48 100644 --- a/src/lib/invert_eo.c +++ b/src/lib/invert_eo.c @@ -61,7 +61,7 @@ #ifdef TM_USE_QPHIX #include "qphix_interface.h" #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif @@ -84,7 +84,7 @@ int invert_eo(spinor *const Even_new, spinor *const Odd_new, spinor *const Even, } #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG if (solver_flag == MG) return MG_solver_eo(Even_new, Odd_new, Even, Odd, precision, max_iter, rel_prec, VOLUME / 2, g_gauge_field, &M_full); diff --git a/src/lib/io/gauge_read.c b/src/lib/io/gauge_read.c index b7be10928..de53d9c28 100644 --- a/src/lib/io/gauge_read.c +++ b/src/lib/io/gauge_read.c @@ -19,7 +19,7 @@ ***********************************************************************/ #include "gauge.ih" -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif @@ -209,7 +209,7 @@ int read_gauge_field(char *filename, su3 **const gf) { // reading a new gauge configuration moves the gauge_id a long way // to guarantee that the change is propagated update_tm_gauge_id(&g_gauge_state, TM_GAUGE_PROPAGATE_THRESHOLD); -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG MG_reset(); #endif } diff --git a/src/lib/io/gauge_read_binary.c b/src/lib/io/gauge_read_binary.c index b61284cab..473e4d9c7 100644 --- a/src/lib/io/gauge_read_binary.c +++ b/src/lib/io/gauge_read_binary.c @@ -22,7 +22,7 @@ /* FIXME I will first fix this function by using referral. Probably should be done better in the future. AD. */ -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON int read_binary_gauge_data(LemonReader* lemonreader, DML_Checksum* checksum, paramsIldgFormat* input, su3** const gf) { int t, x, y, z, status = 0; @@ -144,7 +144,7 @@ int read_binary_gauge_data(LemonReader* lemonreader, DML_Checksum* checksum, free(filebuffer); return (0); } -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ int read_binary_gauge_data(LimeReader *limereader, DML_Checksum *checksum, paramsIldgFormat *input, su3 **const gf) { int t, x, y, z, status = 0; @@ -273,4 +273,4 @@ int read_binary_gauge_data(LimeReader *limereader, DML_Checksum *checksum, param #endif return (0); } -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ diff --git a/src/lib/io/gauge_write_binary.c b/src/lib/io/gauge_write_binary.c index 668b53a17..ad3c7882e 100644 --- a/src/lib/io/gauge_write_binary.c +++ b/src/lib/io/gauge_write_binary.c @@ -22,7 +22,7 @@ /* FIXME I will first fix this function by using referral. Probably should be done better in the future. AD. */ -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON int write_binary_gauge_data(LemonWriter* lemonwriter, const int prec, DML_Checksum* checksum) { int x, xG, y, yG, z, zG, t, tG, status = 0; su3 tmp3[4]; @@ -133,7 +133,7 @@ int write_binary_gauge_data(LemonWriter* lemonwriter, const int prec, DML_Checks return 0; } -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ int write_binary_gauge_data(LimeWriter* limewriter, const int prec, DML_Checksum* checksum) { int x, X, y, Y, z, Z, tt, t0, tag = 0, id = 0, status = 0; @@ -281,4 +281,4 @@ int write_binary_gauge_data(LimeWriter* limewriter, const int prec, DML_Checksum return (0); } -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ diff --git a/src/lib/io/selector.h b/src/lib/io/selector.h index 806178bff..236be8d32 100644 --- a/src/lib/io/selector.h +++ b/src/lib/io/selector.h @@ -21,11 +21,11 @@ #define _IO_SELECTOR_H #include -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON #include -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON #define LIME_FILE MPI_File #define WRITER LemonWriter #define READER LemonReader @@ -42,7 +42,7 @@ #define WriterCloseRecord lemonWriterCloseRecord #define DestroyReader lemonDestroyReader #define DestroyHeader lemonDestroyHeader -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ #define LIME_FILE FILE #define WRITER LimeWriter #define READER LimeReader diff --git a/src/lib/io/spinor_read_binary.c b/src/lib/io/spinor_read_binary.c index 6d459fd2c..81607a700 100644 --- a/src/lib/io/spinor_read_binary.c +++ b/src/lib/io/spinor_read_binary.c @@ -19,7 +19,7 @@ #include "spinor.ih" -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON int read_binary_spinor_data(spinor *const s, spinor *const r, LemonReader *lemonreader, DML_Checksum *checksum) { int t, x, y, z, i = 0, status = 0; @@ -126,7 +126,7 @@ int read_binary_spinor_data(spinor *const s, spinor *const r, LemonReader *lemon free(filebuffer); return 0; } -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ int read_binary_spinor_data(spinor *const s, spinor *const r, LimeReader *limereader, DML_Checksum *checksum) { int t, x, y, z, i = 0, status = 0; @@ -212,9 +212,9 @@ int read_binary_spinor_data(spinor *const s, spinor *const r, LimeReader *limere #endif return (0); } -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON int read_binary_spinor_data_l(spinor *const s, LemonReader *lemonreader, DML_Checksum *checksum) { int t, x, y, z, i = 0, status = 0; int latticeSize[] = {T_global, g_nproc_x * LX, g_nproc_y * LY, g_nproc_z * LZ}; @@ -314,7 +314,7 @@ int read_binary_spinor_data_l(spinor *const s, LemonReader *lemonreader, DML_Che free(filebuffer); return 0; } -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ int read_binary_spinor_data_l(spinor *const s, LimeReader *limereader, DML_Checksum *checksum) { int t, x, y, z, i = 0, status = 0; n_uint64_t bytes; @@ -390,4 +390,4 @@ int read_binary_spinor_data_l(spinor *const s, LimeReader *limereader, DML_Check #endif return (0); } -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ diff --git a/src/lib/io/spinor_write_binary.c b/src/lib/io/spinor_write_binary.c index a2bc0cd68..560b5ce65 100644 --- a/src/lib/io/spinor_write_binary.c +++ b/src/lib/io/spinor_write_binary.c @@ -19,7 +19,7 @@ #include "spinor.ih" -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON int write_binary_spinor_data(spinor *const s, spinor *const r, LemonWriter *lemonwriter, DML_Checksum *checksum, int const prec) { int x, y, z, t, i = 0, xG, yG, zG, tG, status = 0; @@ -124,7 +124,7 @@ int write_binary_spinor_data(spinor *const s, spinor *const r, LemonWriter *lemo return 0; } -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ int write_binary_spinor_data(spinor *const s, spinor *const r, LimeWriter *limewriter, DML_Checksum *checksum, const int prec) { int x, X, y, Y, z, Z, t, t0, tag = 0, id = 0, i = 0, status = 0; @@ -272,9 +272,9 @@ int write_binary_spinor_data(spinor *const s, spinor *const r, LimeWriter *limew } return (0); } -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON int write_binary_spinor_data_l(spinor *const s, LemonWriter *lemonwriter, DML_Checksum *checksum, int const prec) { int x, y, z, t, i = 0, xG, yG, zG, tG, status = 0; @@ -374,7 +374,7 @@ int write_binary_spinor_data_l(spinor *const s, LemonWriter *lemonwriter, DML_Ch return 0; } -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ int write_binary_spinor_data_l(spinor *const s, LimeWriter *limewriter, DML_Checksum *checksum, const int prec) { int x, X, y, Y, z, Z, t, t0, tag = 0, id = 0, i = 0, status = 0; @@ -514,4 +514,4 @@ int write_binary_spinor_data_l(spinor *const s, LimeWriter *limewriter, DML_Chec } return (0); } -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ diff --git a/src/lib/io/spinor_write_propagator_type.c b/src/lib/io/spinor_write_propagator_type.c index 67356b8f1..77eb17728 100644 --- a/src/lib/io/spinor_write_propagator_type.c +++ b/src/lib/io/spinor_write_propagator_type.c @@ -4,9 +4,9 @@ void write_propagator_type(WRITER *writer, const int type) { uint64_t bytes; char *message; -#ifndef HAVE_LIBLEMON +#ifndef TM_USE_LEMON if (g_cart_id == 0) { -#endif /* ! HAVE_LIBLEMON */ +#endif /* ! TM_USE_LEMON */ message = (char *)malloc(128); @@ -34,7 +34,7 @@ void write_propagator_type(WRITER *writer, const int type) { close_writer_record(writer); free(message); -#ifndef HAVE_LIBLEMON +#ifndef TM_USE_LEMON } -#endif /* ! HAVE_LIBLEMON */ +#endif /* ! TM_USE_LEMON */ } diff --git a/src/lib/io/spinor_write_source_format.c b/src/lib/io/spinor_write_source_format.c index a501ae5d3..e6cf0e782 100644 --- a/src/lib/io/spinor_write_source_format.c +++ b/src/lib/io/spinor_write_source_format.c @@ -22,9 +22,9 @@ void write_source_format(WRITER *writer, paramsSourceFormat const *format) { uint64_t bytes; char *buf = NULL; -#ifndef HAVE_LIBLEMON +#ifndef TM_USE_LEMON if (g_cart_id == 0) { -#endif /* ! HAVE_LIBLEMON */ +#endif /* ! TM_USE_LEMON */ buf = (char *)malloc(512); sprintf(buf, "\n" @@ -49,7 +49,7 @@ void write_source_format(WRITER *writer, paramsSourceFormat const *format) { close_writer_record(writer); free(buf); -#ifndef HAVE_LIBLEMON +#ifndef TM_USE_LEMON } -#endif /* ! HAVE_LIBLEMON */ +#endif /* ! TM_USE_LEMON */ } diff --git a/src/lib/io/utils_construct_reader.c b/src/lib/io/utils_construct_reader.c index 2714455b2..832ede73d 100644 --- a/src/lib/io/utils_construct_reader.c +++ b/src/lib/io/utils_construct_reader.c @@ -7,22 +7,22 @@ void construct_reader(READER **reader, char *filename) { int status = 0; if (g_debug_level > 0 && g_cart_id == 0) { -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON printf("# Constructing LEMON reader for file %s ...\n", filename); #else printf("# Constructing LIME reader for file %s ...\n", filename); #endif } -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON fh = (MPI_File *)malloc(sizeof(MPI_File)); status = MPI_File_open(g_cart_grid, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, fh); status = (status == MPI_SUCCESS) ? 0 : 1; -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ fh = fopen(filename, "r"); status = (fh == NULL) ? 1 : 0; fflush(stderr); -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ if (status) { kill_with_error(fh, g_cart_id, @@ -30,11 +30,11 @@ void construct_reader(READER **reader, char *filename) { "rights.\nUnable to continue.\n"); } -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON *reader = lemonCreateReader(fh, g_cart_grid); -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ *reader = limeCreateReader(fh); -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ if (*reader == (READER *)NULL) { kill_with_error(fh, g_cart_id, "\nCould not create reader, unable to continue.\n"); diff --git a/src/lib/io/utils_construct_writer.c b/src/lib/io/utils_construct_writer.c index 4f13900fe..f2fe58bb7 100644 --- a/src/lib/io/utils_construct_writer.c +++ b/src/lib/io/utils_construct_writer.c @@ -4,14 +4,14 @@ void construct_writer(WRITER **writer, char *filename, const int append) { LIME_FILE *fh = NULL; int status = 0; if (g_debug_level > 0 && g_cart_id == 0) { -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON printf("# Constructing LEMON writer for file %s for append = %d\n", filename, append); #else printf("# Constructing LIME writer for file %s for append = %d\n", filename, append); #endif } -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON fh = (MPI_File *)malloc(sizeof(MPI_File)); if (append) { status = MPI_File_open(g_cart_grid, filename, @@ -24,7 +24,7 @@ void construct_writer(WRITER **writer, char *filename, const int append) { status = (status == MPI_SUCCESS) ? 0 : 1; *writer = lemonCreateWriter(fh, g_cart_grid); status = status || (writer == NULL); -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ if (g_cart_id == 0) { if (append) { fh = fopen(filename, "a"); @@ -35,7 +35,7 @@ void construct_writer(WRITER **writer, char *filename, const int append) { *writer = limeCreateWriter(fh); status = status || (writer == NULL); } -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ if (status) kill_with_error(fh, g_cart_id, "Failed to create writer. Aborting...\n"); } diff --git a/src/lib/io/utils_destruct_reader.c b/src/lib/io/utils_destruct_reader.c index 4ee23d595..2ed391c47 100644 --- a/src/lib/io/utils_destruct_reader.c +++ b/src/lib/io/utils_destruct_reader.c @@ -5,10 +5,10 @@ void destruct_reader(READER *reader) { fh = reader->fp; DestroyReader(reader); -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON MPI_File_close(fh); free(fh); /* NB This assumes construct_writer was used to malloc memory! */ -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ fclose(fh); -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ } diff --git a/src/lib/io/utils_destruct_writer.c b/src/lib/io/utils_destruct_writer.c index 840c06b4e..1f6216167 100644 --- a/src/lib/io/utils_destruct_writer.c +++ b/src/lib/io/utils_destruct_writer.c @@ -3,16 +3,16 @@ void destruct_writer(WRITER *writer) { LIME_FILE *fh = NULL; -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON fh = writer->fp; lemonDestroyWriter(writer); MPI_File_close(fh); free(fh); /* NB This assumes construct_writer was used to malloc memory! */ -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ if (g_cart_id == 0) { fh = writer->fp; limeDestroyWriter(writer); fclose(fh); } -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ } diff --git a/src/lib/io/utils_kill_with_error.c b/src/lib/io/utils_kill_with_error.c index bd697220d..322536bd7 100644 --- a/src/lib/io/utils_kill_with_error.c +++ b/src/lib/io/utils_kill_with_error.c @@ -7,11 +7,11 @@ void kill_with_error(LIME_FILE *fh, int const rank, char const *error) { } if (fh != NULL) -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON MPI_File_close(fh); #else fclose(fh); -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ #ifdef TM_USE_MPI MPI_Abort(MPI_COMM_WORLD, 1); diff --git a/src/lib/io/utils_write_first_message.c b/src/lib/io/utils_write_first_message.c index 983b92b0a..287d67c37 100644 --- a/src/lib/io/utils_write_first_message.c +++ b/src/lib/io/utils_write_first_message.c @@ -30,28 +30,28 @@ int write_first_messages(FILE* parameterfile, char const* const executable, TMLQCD_PACKAGE_VERSION, git_hash); printf("%s", message); fprintf(parameterfile, "%s", message); -#ifdef _GAUGE_COPY - printf("# The code is compiled with -D_GAUGE_COPY\n"); - fprintf(parameterfile, "# The code is compiled with -D_GAUGE_COPY\n"); +#ifdef TM_GAUGE_COPY + printf("# The code is compiled with -DTM_GAUGE_COPY\n"); + fprintf(parameterfile, "# The code is compiled with -DTM_GAUGE_COPY\n"); #endif -#ifdef _USE_HALFSPINOR - printf("# The code is compiled with -D_USE_HALFSPINOR\n"); - fprintf(parameterfile, "# The code is compiled with -D_USE_HALFSPINOR\n"); +#ifdef TM_USE_HALFSPINOR + printf("# The code is compiled with -DTM_USE_HALFSPINOR\n"); + fprintf(parameterfile, "# The code is compiled with -DTM_USE_HALFSPINOR\n"); #endif -#ifdef _USE_SHMEM - printf("# the code is compiled with -D_USE_SHMEM\n"); - fprintf(parameterfile, "# the code is compiled with -D_USE_SHMEM\n"); -#ifdef _PERSISTENT +#ifdef TM_USE_SHMEM + printf("# the code is compiled with -DTM_USE_SHMEM\n"); + fprintf(parameterfile, "# the code is compiled with -DTM_USE_SHMEM\n"); +#ifdef TM_PERSISTENT printf("# the code is compiled for persistent MPI calls (halfspinor only)\n"); fprintf(parameterfile, "# the code is compiled for persistent MPI calls (halfspinor only)\n"); #endif #endif #ifdef TM_USE_MPI -#ifdef _NON_BLOCKING +#ifdef TM_NON_BLOCKING printf("# the code is compiled for non-blocking MPI calls (spinor and gauge)\n"); fprintf(parameterfile, "# the code is compiled for non-blocking MPI calls (spinor and gauge)\n"); #endif -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON printf("# the code is compiled with MPI IO / Lemon\n"); fprintf(parameterfile, "# the code is compiled with MPI IO / Lemon\n"); #endif diff --git a/src/lib/io/utils_write_header.c b/src/lib/io/utils_write_header.c index 7f5f85c83..be8ae4ade 100644 --- a/src/lib/io/utils_write_header.c +++ b/src/lib/io/utils_write_header.c @@ -23,9 +23,9 @@ void write_header(WRITER *writer, int MB, int ME, char const *type, uint64_t byt int status; RECORD_HEADER *header; -#ifndef HAVE_LIBLEMON +#ifndef TM_USE_LEMON if (g_cart_id == 0) { -#endif /* ! HAVE_LIBLEMON */ +#endif /* ! TM_USE_LEMON */ /* Nasty (but probably harmless) hack to get rid of const qualifier - the original c-lime was * sloppy here. */ header = CreateHeader(MB, ME, (char *)type, bytes); @@ -35,8 +35,8 @@ void write_header(WRITER *writer, int MB, int ME, char const *type, uint64_t byt if (status != LIME_SUCCESS) { kill_with_error(writer->fp, g_cart_id, "Header writing error. Aborting\n"); } -#ifndef HAVE_LIBLEMON +#ifndef TM_USE_LEMON } -#endif /* ! HAVE_LIBLEMON */ +#endif /* ! TM_USE_LEMON */ return; } diff --git a/src/lib/io/utils_write_message.c b/src/lib/io/utils_write_message.c index b71cdbbce..d346c9a9c 100644 --- a/src/lib/io/utils_write_message.c +++ b/src/lib/io/utils_write_message.c @@ -23,9 +23,9 @@ int write_message(WRITER *writer, char const *buffer, uint64_t bytes) { int status; n_uint64_t bytesWritten = bytes; -#ifndef HAVE_LIBLEMON +#ifndef TM_USE_LEMON if (g_cart_id == 0) { -#endif /* ! HAVE_LIBLEMON */ +#endif /* ! TM_USE_LEMON */ if (buffer == (char *)NULL) return (0); #ifdef TM_USE_MPI @@ -35,8 +35,8 @@ int write_message(WRITER *writer, char const *buffer, uint64_t bytes) { #endif if (status != LIME_SUCCESS || bytes != bytesWritten) kill_with_error(writer->fp, g_cart_id, "I/O error on writing message. Aborting...\n"); -#ifndef HAVE_LIBLEMON +#ifndef TM_USE_LEMON } -#endif /* ! HAVE_LIBLEMON */ +#endif /* ! TM_USE_LEMON */ return (0); } diff --git a/src/lib/linalg/blas.h b/src/lib/linalg/blas.h index a972e5029..110afb01f 100644 --- a/src/lib/linalg/blas.h +++ b/src/lib/linalg/blas.h @@ -23,8 +23,8 @@ #include #include "linalg/fortran.h" -#if defined CRAY || defined HITACHI -/* On the CRAY is all different, of course... */ +#if defined TM_CRAY || defined HITACHI +/* On the TM_CRAY is all different, of course... */ #include "fortran.h" #define zgemm ZGEMM #define zgemv ZGEMV diff --git a/src/lib/linalg/lapack.h b/src/lib/linalg/lapack.h index 1c7f4ce7a..a651b07ae 100644 --- a/src/lib/linalg/lapack.h +++ b/src/lib/linalg/lapack.h @@ -23,7 +23,7 @@ #include #include "linalg/fortran.h" -#if defined CRAY || defined HITACHI +#if defined TM_CRAY || defined HITACHI #define zgels CGELS #define zgesv CGESV #define zgeevx CGEEVX diff --git a/src/lib/little_D.c b/src/lib/little_D.c index 370e7583a..2bee49824 100644 --- a/src/lib/little_D.c +++ b/src/lib/little_D.c @@ -276,11 +276,11 @@ extern int waitcount; void init_little_field_exchange(_Complex double *w) { #ifdef TM_USE_MPI int i = 0; -#if (defined PARALLELT || defined PARALLELX) +#if (defined TM_PARALLELT || defined TM_PARALLELX) int no_dirs = 2; -#elif (defined PARALLELXT || defined PARALLELXY || defined PARALLELXYZ) +#elif (defined TM_PARALLELXT || defined TM_PARALLELXY || defined TM_PARALLELXYZ) int no_dirs = 4; -#elif (defined PARALLELXYT || defined PARALLELXYZT) +#elif (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) int no_dirs = 6; #endif if (waitcount != 0) { @@ -304,7 +304,7 @@ void init_little_field_exchange(_Complex double *w) { g_nb_list[i], i + 1, g_cart_grid, &lrequests[2 * i + 3]); waitcount += 4; } -#ifdef PARALLELXYZT +#ifdef TM_PARALLELXYZT /* send to the right, receive from the left */ i = 6; MPI_Isend((void *)(w + g_N_s), g_N_s, MPI_DOUBLE_COMPLEX, g_nb_list[i], i, g_cart_grid, diff --git a/src/lib/meas/polyakov_loop.c b/src/lib/meas/polyakov_loop.c index 9108bcb99..25deea402 100644 --- a/src/lib/meas/polyakov_loop.c +++ b/src/lib/meas/polyakov_loop.c @@ -446,7 +446,7 @@ int polyakov_loop_dir(const int nstore /* in */, const int dir /* in */) { /* (1) collect contributions from different time/z slices to nodes with rank=0 in spatial volume/space-time slices */ -#ifndef PARALLELXYZT +#ifndef TM_PARALLELXYZT if (dir == 0) { #endif tmp_ray = (su3 *)calloc(VOL3, sizeof(su3)); /* */ @@ -456,7 +456,7 @@ int polyakov_loop_dir(const int nstore /* in */, const int dir /* in */) { } MPI_Reduce(tmp_loc, tmp_ray, VOL3, mpi_su3, mpi_reduce_su3_ray, 0, ray); -#ifndef PARALLELXYZT +#ifndef TM_PARALLELXYZT } #endif @@ -475,7 +475,7 @@ int polyakov_loop_dir(const int nstore /* in */, const int dir /* in */) { ks = 0.0; #ifdef TM_USE_MPI -#ifdef PARALLELXYZT +#ifdef TM_PARALLELXYZT u = tmp_ray; #else if (dir == 0) { @@ -502,11 +502,11 @@ int polyakov_loop_dir(const int nstore /* in */, const int dir /* in */) { #ifdef TM_USE_MPI MPI_Reduce(&pl_tmp, &pl, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, slice); } -#ifndef PARALLELXYZT +#ifndef TM_PARALLELXYZT if (dir == 0) { #endif free(tmp_ray); -#ifndef PARALLELXYZT +#ifndef TM_PARALLELXYZT } #endif diff --git a/src/lib/measure_gauge_action.c b/src/lib/measure_gauge_action.c index 6a558a51b..1f7cb6ad5 100644 --- a/src/lib/measure_gauge_action.c +++ b/src/lib/measure_gauge_action.c @@ -26,9 +26,7 @@ * Returns the value of the action ************************************************************************/ -#ifdef HAVE_CONFIG_H #include -#endif #include #include #include diff --git a/src/lib/mpi_init.c b/src/lib/mpi_init.c index 2bbbde315..cc09fd4cd 100644 --- a/src/lib/mpi_init.c +++ b/src/lib/mpi_init.c @@ -25,7 +25,7 @@ #ifdef TM_USE_MPI #include #endif -#ifdef _USE_SHMEM +#ifdef TM_USE_SHMEM #include #endif #include "global.h" @@ -134,7 +134,7 @@ MPI_Datatype halffield_y_slice_gath; MPI_Datatype halffield_z_slice_cont; -#if (defined PARALLELXYZT || defined PARALLELXYZ) +#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) MPI_Datatype field_z_slice_even_dn; MPI_Datatype field_z_slice_even_up; MPI_Datatype field_z_slice_odd_dn; @@ -188,60 +188,60 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { } #ifdef TM_USE_MPI -#ifdef _USE_SHMEM +#ifdef TM_USE_SHMEM /* we need that the PE number in MPI_COMM_WORL */ /* exactly correspond to the one in g_cart_grid */ reorder = 0; #endif -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_T = 0; /* the other N_PROC_? are read from input, if not constraint below */ /* N_PROC_T will be set by MPI_Dims_create, if not constraint below */ #endif -#if defined PARALLELT +#if defined TM_PARALLELT ndims = 1; -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_X = 1; N_PROC_Y = 1; N_PROC_Z = 1; #endif #endif -#if defined PARALLELX +#if defined TM_PARALLELX ndims = 1; -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_T = 1; N_PROC_Y = 1; N_PROC_Z = 1; #endif #endif -#if defined PARALLELXT +#if defined TM_PARALLELXT ndims = 2; -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_Y = 1; N_PROC_Z = 1; #endif #endif -#if defined PARALLELXY +#if defined TM_PARALLELXY ndims = 2; -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_T = 1; N_PROC_Z = 1; #endif #endif -#if defined PARALLELXYT +#if defined TM_PARALLELXYT ndims = 3; -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_Z = 1; #endif #endif -#if defined PARALLELXYZ +#if defined TM_PARALLELXYZ ndims = 3; -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_T = 1; #endif #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT ndims = 4; #endif dims[0] = N_PROC_T; @@ -278,7 +278,7 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { exit(-1); } -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_T = g_nproc_t; N_PROC_X = g_nproc_x; N_PROC_Y = g_nproc_y; @@ -289,42 +289,42 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { LZ = LZ / g_nproc_z; VOLUME = (T * LX * LY * LZ); SPACEVOLUME = VOLUME / T; -#ifdef PARALLELT +#ifdef TM_PARALLELT RAND = (2 * LX * LY * LZ); EDGES = 0; -#elif defined PARALLELX +#elif defined TM_PARALLELX RAND = (2 * T * LY * LZ); EDGES = 0; -#elif defined PARALLELXT +#elif defined TM_PARALLELXT RAND = 2 * LZ * (LY * LX + T * LY); EDGES = 4 * LZ * LY; -#elif defined PARALLELXY +#elif defined TM_PARALLELXY RAND = 2 * LZ * T * (LX + LY); EDGES = 4 * LZ * T; -#elif defined PARALLELXYT +#elif defined TM_PARALLELXYT RAND = 2 * LZ * (LY * LX + T * LY + T * LX); EDGES = 4 * LZ * (LY + T + LX); -#elif defined PARALLELXYZ +#elif defined TM_PARALLELXYZ RAND = 2 * T * (LY * LZ + LX * LZ + LX * LY); EDGES = 4 * T * (LX + LY + LZ); -#elif defined PARALLELXYZT +#elif defined TM_PARALLELXYZT RAND = 2 * LZ * LY * LX + 2 * LZ * T * LY + 2 * LZ * T * LX + 2 * T * LX * LY; EDGES = 4 * LZ * LY + 4 * LZ * T + 4 * LZ * LX + 4 * LY * T + 4 * LY * LX + 4 * T * LX; -#else /* ifdef PARALLELT */ +#else /* ifdef TM_PARALLELT */ RAND = 0; EDGES = 0; -#endif /* ifdef PARALLELT */ +#endif /* ifdef TM_PARALLELT */ /* Note that VOLUMEPLUSRAND is not always equal to VOLUME+RAND */ /* VOLUMEPLUSRAND rather includes the edges */ VOLUMEPLUSRAND = VOLUME + RAND + EDGES; SPACERAND = RAND / T; -#endif /* ifndef FIXEDVOLUME */ +#endif /* ifndef TM_FIXEDVOLUME */ g_dbw2rand = (RAND + 2 * EDGES); -#if (defined PARALLELXYZT || defined PARALLELXYZ) +#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) field_buffer_z = (spinor *)malloc(T * LX * LY / 2 * sizeof(spinor)); field_buffer_z2 = (spinor *)malloc(T * LX * LY / 2 * sizeof(spinor)); -#ifdef _NON_BLOCKING +#ifdef TM_NON_BLOCKING field_buffer_z3 = (spinor *)malloc(T * LX * LY / 2 * sizeof(spinor)); field_buffer_z4 = (spinor *)malloc(T * LX * LY / 2 * sizeof(spinor)); #endif @@ -347,23 +347,23 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { for (i = 0; i < 8; i++) { g_nb_list[i] = g_cart_id; } -#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) MPI_Cart_shift(g_cart_grid, 0, 1, &g_nb_t_dn, &g_nb_t_up); g_nb_list[0] = g_nb_t_up; g_nb_list[1] = g_nb_t_dn; #endif -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || \ - defined PARALLELXY || defined PARALLELXYZ) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELX || \ + defined TM_PARALLELXY || defined TM_PARALLELXYZ) MPI_Cart_shift(g_cart_grid, 1, 1, &g_nb_x_dn, &g_nb_x_up); g_nb_list[2] = g_nb_x_up; g_nb_list[3] = g_nb_x_dn; #endif -#if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || defined TM_PARALLELXYZ) MPI_Cart_shift(g_cart_grid, 2, 1, &g_nb_y_dn, &g_nb_y_up); g_nb_list[4] = g_nb_y_up; g_nb_list[5] = g_nb_y_dn; #endif -#if (defined PARALLELXYZT || defined PARALLELXYZ) +#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) MPI_Cart_shift(g_cart_grid, 3, 1, &g_nb_z_dn, &g_nb_z_up); g_nb_list[6] = g_nb_z_up; g_nb_list[7] = g_nb_z_dn; @@ -669,7 +669,7 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { g_mpi_ST_rank = 0; g_stdio_proc = 0; -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME T = T_global; VOLUME = (T * LX * LY * LZ); SPACEVOLUME = VOLUME / T; @@ -687,7 +687,7 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { /* Here we perform some checks in order not to */ /* run into trouble later */ -#if (defined PARALLELXYZT || defined PARALLELXYZ) +#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) if ((T * LX * LY) % 2 != 0 && even_odd_flag == 1) { fprintf(stderr, "T*LX*LY must be even!\nAborting prgram...\n"); #ifdef TM_USE_MPI diff --git a/src/lib/mpi_init.h b/src/lib/mpi_init.h index dce6dfad7..d9476e662 100644 --- a/src/lib/mpi_init.h +++ b/src/lib/mpi_init.h @@ -108,8 +108,8 @@ extern MPI_Datatype halffield_y_slice_gath; extern MPI_Datatype halffield_z_slice_cont; -#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || \ - defined PARALLELXYZ) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || \ + defined TM_PARALLELXYZ) extern MPI_Datatype field_z_slice_even_dn; extern MPI_Datatype field_z_slice_even_up; extern MPI_Datatype field_z_slice_odd_dn; diff --git a/src/lib/operator.c b/src/lib/operator.c index 6b6a94df2..e15a97701 100644 --- a/src/lib/operator.c +++ b/src/lib/operator.c @@ -63,7 +63,7 @@ #ifdef TM_USE_QUDA #include "quda_interface.h" #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif diff --git a/src/lib/operator/D_psi_body.c b/src/lib/operator/D_psi_body.c index 06bde0fc5..b5acd1158 100644 --- a/src/lib/operator/D_psi_body.c +++ b/src/lib/operator/D_psi_body.c @@ -283,7 +283,7 @@ void _PSWITCH(D_psi)(_PTSWITCH(spinor) *const P, _PTSWITCH(spinor) *const Q) { _C_TYPE ALIGN32 phase_2l = (_C_TYPE)phase_2; _C_TYPE ALIGN32 phase_3l = (_C_TYPE)phase_3; -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY if (_PSWITCH(g_update_gauge_copy)) { _PSWITCH(update_backward_gauge)(_PSWITCH(g_gauge_field)); } diff --git a/src/lib/operator/Hopping_Matrix.c b/src/lib/operator/Hopping_Matrix.c index a8da9e810..8b106e10a 100644 --- a/src/lib/operator/Hopping_Matrix.c +++ b/src/lib/operator/Hopping_Matrix.c @@ -38,11 +38,11 @@ * * Structure of top level precompiler directives * - * - defining _USE_HALFSPINOR implies that we also use + * - defining TM_USE_HALFSPINOR implies that we also use * a "gauge copy" * * - such that we are checking for the _USE_GAUGECOPY feature seperatly in the - * ELSE branch of the "if defined _USE_HALFSPINOR" statement + * ELSE branch of the "if defined TM_USE_HALFSPINOR" statement * ****************************************************************/ @@ -64,11 +64,11 @@ #include "operator/Hopping_Matrix.h" #include "update_backward_gauge.h" -#if defined _USE_HALFSPINOR +#if defined TM_USE_HALFSPINOR #include "operator/halfspinor_hopping.h" void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -88,10 +88,10 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { return; } -#else /* thats _USE_HALFSPINOR */ +#else /* thats TM_USE_HALFSPINOR */ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -114,4 +114,4 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { return; } -#endif /* thats _USE_HALFSPINOR */ +#endif /* thats TM_USE_HALFSPINOR */ diff --git a/src/lib/operator/Hopping_Matrix_32.c b/src/lib/operator/Hopping_Matrix_32.c index d1fbe78c7..1198d52bb 100644 --- a/src/lib/operator/Hopping_Matrix_32.c +++ b/src/lib/operator/Hopping_Matrix_32.c @@ -39,11 +39,11 @@ * * Structure of top level precompiler directives * - * - defining _USE_HALFSPINOR implies that we also use + * - defining TM_USE_HALFSPINOR implies that we also use * a "gauge copy" * * - such that we are checking for the _USE_GAUGECOPY feature seperatly in the - * ELSE branch of the "if defined _USE_HALFSPINOR" statement + * ELSE branch of the "if defined TM_USE_HALFSPINOR" statement * ****************************************************************/ @@ -66,13 +66,13 @@ #include "update_backward_gauge.h" #include "operator/Hopping_Matrix_32.h" -#if defined _USE_HALFSPINOR +#if defined TM_USE_HALFSPINOR #include "operator/halfspinor_hopping_32.h" #endif void Hopping_Matrix_32_orphaned(const int ieo, spinor32* const l, spinor32* const k) { -#if defined _USE_HALFSPINOR -#ifdef _GAUGE_COPY +#if defined TM_USE_HALFSPINOR +#ifdef TM_GAUGE_COPY if (g_update_gauge_copy_32) { update_backward_gauge_32_orphaned(g_gauge_field_32); } diff --git a/src/lib/operator/Hopping_Matrix_nocom.c b/src/lib/operator/Hopping_Matrix_nocom.c index dce8ad591..c7814bbb0 100644 --- a/src/lib/operator/Hopping_Matrix_nocom.c +++ b/src/lib/operator/Hopping_Matrix_nocom.c @@ -48,8 +48,8 @@ #define Hopping_Matrix Hopping_Matrix_nocom #define _NO_COMM 1 -#ifdef _KOJAK_INST -#undef _KOJAK_INST +#ifdef TM_KOJAK_INST +#undef TM_KOJAK_INST #endif #include "Hopping_Matrix.c" diff --git a/src/lib/operator/halfspinor_body.c b/src/lib/operator/halfspinor_body.c index 542292b1d..a2c54c7e4 100644 --- a/src/lib/operator/halfspinor_body.c +++ b/src/lib/operator/halfspinor_body.c @@ -30,7 +30,7 @@ halfspinor* restrict* phi ALIGN; halfspinor32* restrict* phi32 ALIGN; _declare_hregs(); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(hoppingmatrix) #endif @@ -320,6 +320,6 @@ if (g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) { #endif } } -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(hoppingmatrix) #endif diff --git a/src/lib/operator/hopping_bg_dbl.c b/src/lib/operator/hopping_bg_dbl.c index 02e6b5c04..93af99e24 100644 --- a/src/lib/operator/hopping_bg_dbl.c +++ b/src/lib/operator/hopping_bg_dbl.c @@ -41,7 +41,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { __alignx(16, l); __alignx(16, k); -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -64,7 +64,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { sp = k + icy; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = &g_gauge_field_copy[ioff][0]; #else up = &g_gauge_field[ix][0]; @@ -76,7 +76,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { /*********************** direction +0 ************************/ iy = g_idn[ix][0]; icy = g_lexic2eosub[iy]; -#if (!defined _GAUGE_COPY) +#if (!defined TM_GAUGE_COPY) um = &g_gauge_field[iy][0]; #else um = up + 1; @@ -90,7 +90,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_iup[ix][1]; icy = g_lexic2eosub[iy]; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -104,7 +104,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_idn[ix][1]; icy = g_lexic2eosub[iy]; -#ifndef _GAUGE_COPY +#ifndef TM_GAUGE_COPY um = &g_gauge_field[iy][1]; #else um = up + 1; @@ -117,7 +117,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_iup[ix][2]; icy = g_lexic2eosub[iy]; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -131,7 +131,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_idn[ix][2]; icy = g_lexic2eosub[iy]; -#ifndef _GAUGE_COPY +#ifndef TM_GAUGE_COPY um = &g_gauge_field[iy][2]; #else um = up + 1; @@ -145,7 +145,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_iup[ix][3]; icy = g_lexic2eosub[iy]; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -158,7 +158,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_idn[ix][3]; icy = g_lexic2eosub[iy]; -#ifndef _GAUGE_COPY +#ifndef TM_GAUGE_COPY um = &g_gauge_field[iy][3]; #else um = up + 1; @@ -174,7 +174,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_iup[iz][0]; icy = g_lexic2eosub[iy]; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up = &g_gauge_field[iz][0]; diff --git a/src/lib/operator/hopping_body_dbl.c b/src/lib/operator/hopping_body_dbl.c index 02df9c943..c3eefb74a 100644 --- a/src/lib/operator/hopping_body_dbl.c +++ b/src/lib/operator/hopping_body_dbl.c @@ -43,7 +43,7 @@ if (ieo == 0) { #ifndef TM_USE_OMP hi = &g_hi[16 * ioff]; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = &g_gauge_field_copy[ioff][0]; #else up = &g_gauge_field[(*hi)][0]; @@ -60,7 +60,7 @@ hi++; for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { #ifdef TM_USE_OMP hi = &g_hi[16 * icx]; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = &g_gauge_field_copy[icx][0]; #else up = &g_gauge_field[(*hi)][0]; @@ -74,7 +74,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { pn = p + (icx - ioff); #endif /*********************** direction +t ************************/ -#if (!defined _GAUGE_COPY) +#if (!defined TM_GAUGE_COPY) um = &g_gauge_field[(*hi)][0]; #else um = up + 1; @@ -86,7 +86,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_t_p(); /*********************** direction -t ************************/ -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -97,7 +97,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_t_m(); /*********************** direction +1 ************************/ -#ifndef _GAUGE_COPY +#ifndef TM_GAUGE_COPY um = &g_gauge_field[(*hi)][1]; #else um = up + 1; @@ -109,7 +109,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_x_p(); /*********************** direction -1 ************************/ -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -120,7 +120,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_x_m(); /*********************** direction +2 ************************/ -#ifndef _GAUGE_COPY +#ifndef TM_GAUGE_COPY um = &g_gauge_field[(*hi)][2]; #else um = up + 1; @@ -132,7 +132,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_y_p(); /*********************** direction -2 ************************/ -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -143,7 +143,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_y_m(); /*********************** direction +3 ************************/ -#ifndef _GAUGE_COPY +#ifndef TM_GAUGE_COPY um = &g_gauge_field[(*hi)][3]; #else um = up + 1; @@ -156,7 +156,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { /*********************** direction -3 ************************/ #ifndef TM_USE_OMP -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up = &g_gauge_field[(*hi)][0]; diff --git a/src/lib/operator/hopping_sgl.c b/src/lib/operator/hopping_sgl.c index 5067ab13d..062507158 100644 --- a/src/lib/operator/hopping_sgl.c +++ b/src/lib/operator/hopping_sgl.c @@ -37,7 +37,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { spinor32* restrict r, * restrict sp, * restrict sm; spinor32 temp; -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(); } @@ -72,7 +72,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sp = k + icy; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = &g_gauge_field_copy[icx][0]; #else up = &g_gauge_field[ix][0]; @@ -100,7 +100,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sm = k + icy; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) um = up + 1; #else um = &g_gauge_field[iy][0]; @@ -129,7 +129,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { sp = k + icy; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -157,7 +157,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sm = k + icy; -#ifndef _GAUGE_COPY +#ifndef TM_GAUGE_COPY um = &g_gauge_field[iy][1]; #else um = up + 1; @@ -185,7 +185,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sp = k + icy; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -212,7 +212,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sm = k + icy; -#ifndef _GAUGE_COPY +#ifndef TM_GAUGE_COPY um = &g_gauge_field[iy][2]; #else um = up + 1; @@ -240,7 +240,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sp = k + icy; -#if ((defined _GAUGE_COPY)) +#if ((defined TM_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -267,7 +267,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sm = k + icy; -#ifndef _GAUGE_COPY +#ifndef TM_GAUGE_COPY um = &g_gauge_field[iy][3]; #else um = up + 1; diff --git a/src/lib/operator/tm_sub_Hopping_Matrix.c b/src/lib/operator/tm_sub_Hopping_Matrix.c index fd2aef9db..857404088 100644 --- a/src/lib/operator/tm_sub_Hopping_Matrix.c +++ b/src/lib/operator/tm_sub_Hopping_Matrix.c @@ -51,12 +51,12 @@ // where cfactor = a + i b // -#if (defined _USE_HALFSPINOR) +#if (defined TM_USE_HALFSPINOR) #include "operator/halfspinor_hopping.h" void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* const p, spinor* const k, complex double const cfactor) { -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -78,10 +78,10 @@ void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* const p, spin return; } -#elif (!defined _NO_COMM && !defined _USE_HALFSPINOR) +#elif (!defined _NO_COMM && !defined TM_USE_HALFSPINOR) void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* p, spinor* const k, complex double const cfactor) { -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } diff --git a/src/lib/operator/tm_times_Hopping_Matrix.c b/src/lib/operator/tm_times_Hopping_Matrix.c index 3b336d2a9..6d1abddba 100644 --- a/src/lib/operator/tm_times_Hopping_Matrix.c +++ b/src/lib/operator/tm_times_Hopping_Matrix.c @@ -51,12 +51,12 @@ // where cfactor = a + i b // -#if (defined _USE_HALFSPINOR && !defined _NO_COMM) +#if (defined TM_USE_HALFSPINOR && !defined _NO_COMM) #include "operator/halfspinor_hopping.h" void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k, complex double const cfactor) { -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -78,10 +78,10 @@ void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k, return; } -#elif (!defined _NO_COMM && !defined _USE_HALFSPINOR) +#elif (!defined _NO_COMM && !defined TM_USE_HALFSPINOR) void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k, double complex const cfactor) { -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -103,4 +103,4 @@ void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k, #endif return; } -#endif //_USE_HALFSPINOR && !defined _NO_COMM +#endif //TM_USE_HALFSPINOR && !defined _NO_COMM diff --git a/src/lib/overrelaxation.c b/src/lib/overrelaxation.c index 91d95fa30..2a1329bba 100644 --- a/src/lib/overrelaxation.c +++ b/src/lib/overrelaxation.c @@ -153,7 +153,7 @@ void flip_subgroup(int ix, int mu, su3 vv, int i) { *z = w; } -#if defined PARALLEL1 +#if defined TM_PARALLEL1 void overrel_sweep() { int x0, x1, x2, x3; int mu, ix; diff --git a/src/lib/parallel_io.h b/src/lib/parallel_io.h index 50e03fd59..98df3fb8c 100644 --- a/src/lib/parallel_io.h +++ b/src/lib/parallel_io.h @@ -17,8 +17,8 @@ * along with tmLQCD. If not, see . ***********************************************************************/ -#ifndef _PARALLEL_IO_H -#define _PARALLEL_IO_H +#ifndef _TM_PARALLEL_IO_H +#define _TM_PARALLEL_IO_H #include #include "dml.h" diff --git a/src/lib/read_input.l b/src/lib/read_input.l index 6af756c7e..59f002748 100644 --- a/src/lib/read_input.l +++ b/src/lib/read_input.l @@ -849,8 +849,8 @@ static inline double fltlist_next_token(int * const list_end){ } AMG{SPC}* { -#ifdef DDalphaAMG - if(myverbose) printf("Initialising DDalphaAMG line %d\n", line_of_file); +#ifdef TM_USE_DDalphaAMG + if(myverbose) printf("Initialising DDalphaAMG line %d\n", line_of_file); BEGIN(MULTIGRID); #else printf("ERROR line %d: DDalphaAMG library not included\n", line_of_file); @@ -951,7 +951,7 @@ static inline double fltlist_next_token(int * const list_end){ mg_no_shifts=0; if(myverbose) printf(" MG_MMS_Mass set to %.16f line %d operator %d\n", mg_mms_mass, line_of_file, current_operator); } - EndDDalphaAMG{SPC}* { + End_DDalphaAMG{SPC}* { if(myverbose) printf("DDalphaAMG parsed in line %d\n\n", line_of_file); BEGIN(0); } @@ -1385,7 +1385,7 @@ static inline double fltlist_next_token(int * const list_end){ BEGIN(name_caller); } DDalphaAMG { -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG optr->solver = MG; if(myverbose) printf(" Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator); BEGIN(name_caller); @@ -1490,7 +1490,7 @@ static inline double fltlist_next_token(int * const list_end){ BEGIN(name_caller); } DDalphaAMG { -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG optr->solver = MG; if(myverbose) printf(" Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator); BEGIN(name_caller); @@ -1543,7 +1543,7 @@ static inline double fltlist_next_token(int * const list_end){ BEGIN(name_caller); } DDalphaAMG { -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG optr->solver = MG; if(myverbose) printf(" Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator); BEGIN(name_caller); @@ -2834,7 +2834,7 @@ static inline double fltlist_next_token(int * const list_end){ BEGIN(name_caller); } DDalphaAMG { -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG if(myverbose) printf(" Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator); mnl->solver = MG; BEGIN(solver_caller); @@ -2877,7 +2877,7 @@ static inline double fltlist_next_token(int * const list_end){ BEGIN(solver_caller); } DDalphaAMG { -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG if(myverbose) printf(" HB Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator); mnl->HB_solver = MG; BEGIN(solver_caller); @@ -2902,7 +2902,7 @@ static inline double fltlist_next_token(int * const list_end){ BEGIN(solver_caller); } DDalphaAMG { -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG if(myverbose) printf(" Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator); mnl->solver = MG; BEGIN(solver_caller); @@ -2935,7 +2935,7 @@ static inline double fltlist_next_token(int * const list_end){ BEGIN(solver_caller); } DDalphaAMG { -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG if(myverbose) printf(" Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator); mnl->solver = MG; BEGIN(solver_caller); @@ -3229,49 +3229,49 @@ static inline double fltlist_next_token(int * const list_end){ } {DIGIT}+ { -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME T_global = atoi(yytext); if(myverbose!=0) printf("T =%s\n", yytext); #endif } {DIGIT}+ { -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME L = atoi(yytext); if(myverbose!=0) printf("L =%s\n", yytext); #endif } {DIGIT}+ { -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME LX = atoi(yytext); if(myverbose!=0) printf("LX =%s\n", yytext); #endif } {DIGIT}+ { -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME LY = atoi(yytext); if(myverbose!=0) printf("LY =%s\n", yytext); #endif } {DIGIT}+ { -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME LZ = atoi(yytext); if(myverbose!=0) printf("LZ =%s\n", yytext); #endif } {DIGIT}+ { -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_X = atoi(yytext); if(myverbose!=0) printf("Nr of processors in x direction = %s\n", yytext); #endif } {DIGIT}+ { -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_Y = atoi(yytext); if(myverbose!=0) printf("Nr of processors in y direction = %s\n", yytext); #endif } {DIGIT}+ { -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME N_PROC_Z = atoi(yytext); if(myverbose!=0) printf("Nr of processors in z direction = %s\n", yytext); #endif @@ -3776,7 +3776,7 @@ int read_input(const char * conf_file){ * Setting default values! ********************************************/ reread = 0; -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME T_global = _default_T_global; L = _default_L; LX = _default_LX; @@ -3994,7 +3994,7 @@ int read_input(const char * conf_file){ yyout = fopen("/dev/null", "w"); parse_config(); -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME if(LX == 0) { LX = L; } @@ -4029,7 +4029,7 @@ int read_input(const char * conf_file){ */ int reread_input(const char * conf_file){ -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME int tt=T, ll=L, lx = LX, ly = LY, lz = LZ, np=N_PROC_X, npy = N_PROC_Y; #endif @@ -4054,7 +4054,7 @@ int reread_input(const char * conf_file){ parse_config(); -#ifndef FIXEDVOLUME +#ifndef TM_FIXEDVOLUME T = tt; L = ll; LX = lx; diff --git a/src/lib/solver/cg_her.c b/src/lib/solver/cg_her.c index bf6981c4b..b556acb25 100644 --- a/src/lib/solver/cg_her.c +++ b/src/lib/solver/cg_her.c @@ -102,7 +102,7 @@ int cg_her(spinor* const P, spinor* const Q, const int max_iter, double eps_sq, if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq * squarenorm) && (rel_prec == 1))) { break; } -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR if (((err * err <= eps_sq) && (rel_prec == 0)) || ((err * err <= eps_sq * squarenorm) && (rel_prec == 1))) { g_sloppy_precision = 1; diff --git a/src/lib/solver/cg_her_nd.c b/src/lib/solver/cg_her_nd.c index 03a85a713..746c21718 100644 --- a/src/lib/solver/cg_her_nd.c +++ b/src/lib/solver/cg_her_nd.c @@ -133,7 +133,7 @@ int cg_her_nd(spinor* const P_up, spinor* P_dn, spinor* const Q_up, spinor* cons if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq * squarenorm) && (rel_prec == 1))) { break; } -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR if (((err * err <= eps_sq) && (rel_prec == 0)) || ((err * err <= eps_sq * squarenorm) && (rel_prec == 1))) { g_sloppy_precision = 1; diff --git a/src/lib/solver/cr.c b/src/lib/solver/cr.c index 58022ac28..f6a1bd348 100644 --- a/src/lib/solver/cr.c +++ b/src/lib/solver/cr.c @@ -106,7 +106,7 @@ int cr(spinor* const P, spinor* const Q, const int m, const int max_restarts, co break; } -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR if (((err * err <= eps_sq) && (rel_prec == 0)) || ((err * err <= eps_sq * norm_sq) && (rel_prec == 1))) { if (g_sloppy_precision_flag == 1) { diff --git a/src/lib/solver/diagonalise_general_matrix.c b/src/lib/solver/diagonalise_general_matrix.c index 0667da9aa..9fb989da6 100644 --- a/src/lib/solver/diagonalise_general_matrix.c +++ b/src/lib/solver/diagonalise_general_matrix.c @@ -70,7 +70,7 @@ void diagonalise_general_matrix(int n, _Complex double *A, int lda, _Complex dou /* Query call to get the optimal lwork */ lwork = -1; -#ifdef HAVE_LAPACK +#ifdef TM_LAPACK _FT(zgeevx)("N", "N", "V", "N", &n, A, &lda, evalues, vl, &n, vr, &n, &ilo, &ihi, scale, &abnrm, rcone, rconv, &dummy, &lwork, rwork, &info, 1, 1, 1, 1); lwork = (int)(creal(dummy)); diff --git a/src/lib/solver/dirac_operator_eigenvectors.c b/src/lib/solver/dirac_operator_eigenvectors.c index 42e85d198..845d5aedc 100644 --- a/src/lib/solver/dirac_operator_eigenvectors.c +++ b/src/lib/solver/dirac_operator_eigenvectors.c @@ -28,7 +28,7 @@ #ifdef FFTW #include #endif -#ifdef _USE_SHMEM +#ifdef TM_USE_SHMEM #include #endif #include @@ -330,7 +330,7 @@ _Complex double calcDDaggerDovEvalue(const int *praw, double kappa, double rho, } void spinor_fft(spinor *spinor_in, spinor *spinor_out, int tt, int ll, unsigned int forward) { -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW fftw_plan plan = spinor_fftw_plan(spinor_in, spinor_out, tt, ll, forward, FFTW_WISDOM_ONLY); fftw_execute(plan); #else @@ -555,7 +555,7 @@ void spinorPrecWS_Free(spinorPrecWS *ws) { */ void eigenvector_Dtm(spinor *spin, double mu, int epsilon, int k, int color, int rawp[4]) { -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW fftw_plan p1bw; #endif int i = 0; @@ -630,7 +630,7 @@ void eigenvector_Dtm(spinor *spin, double mu, int epsilon, int k, int color, int _spinor_muleq_real(*phi, 1.0 / sqrt((double)(VOLUME))); -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW p1bw = spinor_fftw_plan(spin, spin, T, L, 0, FFTW_WISDOM_ONLY); fftw_execute(p1bw); #endif @@ -638,7 +638,7 @@ void eigenvector_Dtm(spinor *spin, double mu, int epsilon, int k, int color, int /* spinor mulp half phase */ } -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW fftw_plan spinor_fftw_plan(const spinor *spinor_in, spinor *spinor_out, int T, int ll, unsigned int forward, int fftw_flags) { /* int index_s = gsi(get_index(it, ix, iy, iz, tt, ll)); */ @@ -760,13 +760,13 @@ void spinorPrecondition(spinor *spinor_out, const spinor *spinor_in, spinorPrecW spinor phi_plus; double OOVOL = 1. / (double)(VOLUME); -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW fftw_plan plan_fw; fftw_plan plan_bw; #endif if (autofft == 1) { -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW /* spinor_mulp_half_phase(spinor_out,spinor_in,ws->c_table, ws->s_table,1,1.); */ plan_fw = spinor_fftw_plan(spinor_in, spinor_out, tt, ll, 1 /* = true */, FFTW_WISDOM_ONLY); fftw_execute(plan_fw); @@ -889,7 +889,7 @@ void spinorPrecondition(spinor *spinor_out, const spinor *spinor_in, spinorPrecW } if (autofft == 1) { -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW plan_bw = spinor_fftw_plan(spinor_out, spinor_out, tt, LX, 0, FFTW_WISDOM_ONLY); fftw_execute(plan_bw); #endif @@ -1292,7 +1292,7 @@ void spinor_mulp_half_phase(spinor *spinor_out, const spinor *spinor_in, double * loading and storing of fftw wisdoms */ -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW void loadFFTWWisdom(spinor *spinor_in, spinor *spinor_out, int tt, int ll) { /* ostringstream filename_fftw_wisdom; */ /* filename_fftw_wisdom << "fftw_wisdom_" << setw(2) << setfill('0') << T << "x"<< setw(2) << @@ -2050,7 +2050,7 @@ void calculateDiagFalloffElements(const int op_id) { if (g_precWS == NULL) { /* we are going to need fft*/ -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW loadFFTWWisdom(g_spinor_field[0], g_spinor_field[1], T, LX); #endif } diff --git a/src/lib/solver/dirac_operator_eigenvectors.h b/src/lib/solver/dirac_operator_eigenvectors.h index 1ebe2ce71..b10a86312 100644 --- a/src/lib/solver/dirac_operator_eigenvectors.h +++ b/src/lib/solver/dirac_operator_eigenvectors.h @@ -24,7 +24,7 @@ #ifdef HAVE_CONFIG_H #include "tmlqcd_config.h" #endif -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW #include #endif @@ -68,7 +68,7 @@ extern tm_operator PRECWSOPERATORSELECT[14]; /* */ extern double g_prec_sequence_d_dagger_d[3]; -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW fftw_plan spinor_fftw_plan(const spinor *spinor_in, spinor *spinor_out, int tt, int ll, unsigned int forward, int fftw_flags); #endif @@ -170,7 +170,7 @@ void spinor_mulp_half_phase(spinor *spinor_out, const spinor *spinor_in, double * read and write fftw wisdoms * this is supposed to speed up things */ -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW void writeFFTWWisdom(int tt, int ll); void loadFFTWWisdom(spinor *spinor_in, spinor *spinor_out, int tt, int ll); #endif diff --git a/src/lib/solver/eigenvalues.c b/src/lib/solver/eigenvalues.c index 1725387d0..4d8d08887 100644 --- a/src/lib/solver/eigenvalues.c +++ b/src/lib/solver/eigenvalues.c @@ -68,7 +68,7 @@ double eigenvalues(int *nr_of_eigenvalues, const int max_iterations, const doubl const int even_odd_flag) { double returnvalue; _Complex double norm2; -#ifdef HAVE_LAPACK +#ifdef TM_LAPACK static int allocated = 0; char filename[200]; FILE *ofs; diff --git a/src/lib/solver/fgmres.c b/src/lib/solver/fgmres.c index 60d10fa72..154428124 100644 --- a/src/lib/solver/fgmres.c +++ b/src/lib/solver/fgmres.c @@ -85,7 +85,7 @@ int fgmres(spinor *const P, spinor *const Q, const int m, const int max_restarts atime = gettime(); cumiter_lgcr = 0; if (N == VOLUME) { - init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); /* #ifdef HAVE_LAPACK */ + init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); /* #ifdef TM_LAPACK */ } else { init_solver_field(&solver_field, VOLUMEPLUSRAND / 2, nr_sf); } diff --git a/src/lib/solver/fgmres4complex_body.c b/src/lib/solver/fgmres4complex_body.c index b11528c58..1f6fa9c89 100644 --- a/src/lib/solver/fgmres4complex_body.c +++ b/src/lib/solver/fgmres4complex_body.c @@ -57,7 +57,7 @@ int _PSWITCH(fgmres4complex)(_Complex _F_TYPE *const P, _Complex _F_TYPE *const int fltcntr = 0; double alphasave = 0; - _PSWITCH(init_lsolver_field)(&solver_field, /*why not N?*/ lda, nr_sf); /* #ifdef HAVE_LAPACK */ + _PSWITCH(init_lsolver_field)(&solver_field, /*why not N?*/ lda, nr_sf); /* #ifdef TM_LAPACK */ eps = sqrt(eps_sq); _PSWITCH(init_lgmres)(m, lda); diff --git a/src/lib/solver/gmres_dr.c b/src/lib/solver/gmres_dr.c index 4b9f429e0..781b32d86 100644 --- a/src/lib/solver/gmres_dr.c +++ b/src/lib/solver/gmres_dr.c @@ -54,7 +54,7 @@ #include "solver/solver_field.h" #include "su3.h" -#ifndef HAVE_LAPACK +#ifndef TM_LAPACK /* In case there is no lapack use normal gmres */ int gmres_dr(spinor* const P, spinor* const Q, const int m, const int nr_ev, const int max_restarts, const double eps_sq, const int rel_prec, const int N, matrix_mult f) { diff --git a/src/lib/solver/gram-schmidt.c b/src/lib/solver/gram-schmidt.c index 1e8da1d24..ffd5d6b29 100644 --- a/src/lib/solver/gram-schmidt.c +++ b/src/lib/solver/gram-schmidt.c @@ -26,7 +26,7 @@ #include "linalg/blas.h" #include "linalg_eo.h" #include "su3spinor.h" -#ifdef CRAY +#ifdef TM_CRAY #include #endif #include "gram-schmidt.h" @@ -62,7 +62,7 @@ void IteratedClassicalGS(_Complex double v[], double *vnrm, int n, int m, _Compl work1[j] = scalar_prod((spinor *)(A + j * lda), (spinor *)v, n * sizeof(_Complex double) / sizeof(spinor), 1); } -#ifdef HAVE_LAPACK +#ifdef TM_LAPACK _FT(zgemv)(fupl_n, &n, &m, &CMONE, A, &lda, work1, &ONE, &CONE, v, &ONE, 1); #endif (*vnrm) = sqrt(square_norm((spinor *)v, n * sizeof(_Complex double) / sizeof(spinor), 1)); @@ -90,7 +90,7 @@ void ModifiedGS(_Complex double v[], int n, int m, _Complex double A[], int lda) s = scalar_prod((spinor *)(A + i * lda), (spinor *)v, n * sizeof(_Complex double) / sizeof(spinor), 1); s = -s; -#ifdef HAVE_LAPACK +#ifdef TM_LAPACK _FT(zaxpy)(&n, &s, A + i * lda, &ONE, v, &ONE); #endif } diff --git a/src/lib/solver/mcr.c b/src/lib/solver/mcr.c index 707181cc2..184fa567f 100644 --- a/src/lib/solver/mcr.c +++ b/src/lib/solver/mcr.c @@ -127,7 +127,7 @@ int mcr(spinor* const P, spinor* const Q, const int m, const int max_restarts, c break; } -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR if (((err * err <= eps_sq) && (rel_prec == 0)) || ((err * err <= eps_sq * norm_sq) && (rel_prec == 1))) { if (g_sloppy_precision_flag == 1) { diff --git a/src/lib/solver/monomial_solve.c b/src/lib/solver/monomial_solve.c index 94873079f..0e73e9b0d 100644 --- a/src/lib/solver/monomial_solve.c +++ b/src/lib/solver/monomial_solve.c @@ -77,7 +77,7 @@ #include "solver/solver_params.h" #include "solver/solver_types.h" -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif #ifdef TM_USE_QPHIX @@ -184,7 +184,7 @@ int solve_degenerate(spinor* const P, spinor* const Q, solver_params_t solver_pa } else if (solver_type == BICGSTAB) { iteration_count = bicgstab_complex(P, Q, max_iter, eps_sq, rel_prec, N, f); } -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG else if (solver_type == MG) iteration_count = MG_solver(P, Q, eps_sq, max_iter, rel_prec, N, g_gauge_field, f); #endif @@ -283,7 +283,7 @@ int solve_mms_tm(spinor** const P, spinor* const Q, solver_params_t* solver_para if (solver_params->type == CGMMS) { iteration_count = cg_mms_tm(P, Q, solver_params); } -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG else if (solver_params->type == MG) { // if the mg_mms_mass is larger than the smallest shift we use MG if (mg_no_shifts > 0 || mg_mms_mass >= solver_params->shifts[0]) { @@ -507,7 +507,7 @@ int solve_mms_nd(spinor** const Pup, spinor** const Pdn, spinor* const Qup, spin } else if (solver_params->type == CGMMSND) { iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_params); } -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG else if (solver_params->type == MG) { // if the mg_mms_mass is larger than the smallest shift we use MG if (mg_no_shifts > 0 || mg_mms_mass >= solver_params->shifts[0]) { @@ -691,7 +691,7 @@ int solve_mms_nd_plus(spinor** const Pup, spinor** const Pdn, spinor* const Qup, int iteration_count = 0; -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG // With MG we can solve directly the unsquared operator if (solver_params->type == MG) { matrix_mult_nd f = Qtm_tau1_ndpsi_add_Ishift; diff --git a/src/lib/solver/solver_field.c b/src/lib/solver/solver_field.c index 1cfd06515..5644a4cae 100644 --- a/src/lib/solver/solver_field.c +++ b/src/lib/solver/solver_field.c @@ -37,7 +37,7 @@ int init_solver_field(spinor*** const solver_field, const int V, const int nr) { } /* allocate the full chunk of memory to solver_field[nr] */ -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) if ((void*)((*solver_field)[nr] = (spinor*)shmalloc((nr * V + 1) * sizeof(spinor))) == NULL) { fprintf(stderr, "malloc errno in init_solver_field: %d\n", errno); errno = 0; @@ -74,7 +74,7 @@ int init_solver_field_32(spinor32*** const solver_field, const int V, const int } /* allocate the full chunk of memory to solver_field[nr] */ -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) if ((void*)((*solver_field)[nr] = (spinor32*)shmalloc((nr * V + 1) * sizeof(spinor32))) == NULL) { fprintf(stderr, "malloc errno in init_solver_field: %d\n", errno); errno = 0; @@ -143,7 +143,7 @@ int init_lsolver_field(_Complex double*** const solver_field, const int V, const } /* allocate the full chunk of memory to solver_field[nr] */ -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) if ((void*)((*solver_field)[nr] = (_Complex double*)shmalloc((nr * V + 1) * sizeof(_Complex double))) == NULL) { fprintf(stderr, "malloc errno in init_solver_field: %d\n", errno); @@ -184,7 +184,7 @@ int init_lsolver_field_32(_Complex float*** const solver_field, const int V, con } /* allocate the full chunk of memory to solver_field[nr] */ -#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR)) +#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR)) if ((void*)((*solver_field)[nr] = (_Complex float*)shmalloc((nr * V + 1) * sizeof(_Complex float))) == NULL) { fprintf(stderr, "malloc errno in init_solver_field: %d\n", errno); diff --git a/src/lib/spinor_fft.c b/src/lib/spinor_fft.c index fb101d269..54ece4bda 100644 --- a/src/lib/spinor_fft.c +++ b/src/lib/spinor_fft.c @@ -22,7 +22,7 @@ #include "mpi_init.h" #include "spinor_fft.h" -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW #include #endif @@ -35,7 +35,7 @@ void check_mpi_comm_membership(MPI_Comm commself, MPI_Comm commcheck, const char const char *name_b, FILE *logFile); #endif -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW fftw_plan spinor_fftw_plan2d(spinor *spinor_in, spinor *spinor_out, int dim0, int dim1, int howmany, unsigned int forward, int fftw_flags); #endif @@ -50,7 +50,7 @@ void spinor_fft_transpose_xp_t(spinor *fieldout, spinor *fieldin, int dim0, int void spinor_fft_reduce_2d(spinor *localSpinorField, int *collectionRank, spinor ***field_collection, spinor **membuff) { /* this implementation is intended for four dimensional parallelisation */ -#if (defined PARALLELXYZT && defined TM_USE_MPI && defined HAVE_FFTW) +#if (defined TM_PARALLELXYZT && defined TM_USE_MPI && defined TM_USE_FFTW) int sendRecvCoord[4]; int i; @@ -195,7 +195,7 @@ void spinor_fft_reduce_2d(spinor *localSpinorField, int *collectionRank, spinor void spinor_fft_redist_2d(spinor *localSpinorField, int collectionRank, spinor **field_collection, spinor *membuff) { /* this implementation is intended for four dimensional parallelisation */ -#if (defined PARALLELXYZT && defined TM_USE_MPI && defined HAVE_FFTW) +#if (defined TM_PARALLELXYZT && defined TM_USE_MPI && defined TM_USE_FFTW) int sendRecvCoord[4]; int dims[] = {g_nproc_t, g_nproc_x, g_nproc_y, g_nproc_z}; @@ -326,7 +326,7 @@ void spinor_fft_redist_2d(spinor *localSpinorField, int collectionRank, spinor * #endif } -#ifdef HAVE_FFTW +#ifdef TM_USE_FFTW fftw_plan spinor_fftw_plan2d(spinor *spinor_in, spinor *spinor_out, int dim0, int dim1, int howmany_wospin, unsigned int forward, int fftw_flags) { /* int index_s = gsi(get_index(it, ix, iy, iz, T, L)); */ diff --git a/src/lib/test/Makefile b/src/lib/test/Makefile deleted file mode 100644 index 8efc8b569..000000000 --- a/src/lib/test/Makefile +++ /dev/null @@ -1,88 +0,0 @@ -TARGETS = scalar_prod_r_test - -USESF = yes - -OS = -os3 - -# gcc shouldn't see this options, that's why we don't use CGLAGS here -NLCCFLAGS = -D_STD_C99_COMPLEX_CHECKED -D_STD_C99_COMPLEX -Dapenext -INCLUDES = -I../ -# workaround to let nlcc not see the non-standard complex.h -NLCCINCLUDES = -I${NROOT}/include/nlibc/ ${INCLUDES} - -NLCCOPTS = -gp ${NLCCFLAGS} ${NLCCINCLUDES} -ifdef USESF - MPPOPTS = -sf -v - SHAKEROPTS = -n -z -else - MPPOPTS = -v - SHAKEROPTS = +a -z -endif -SOFANOPTS = --rr - -# needed due to a bug in nlcc -NLCCOS = -OS3 - -NLCC = nlcc-0.5.2 -MPP = mpp -SOFAN = sofan -SHAKER = shaker -M4 = m4 -CCDEP = gcc -DEPFLAGS = -MM -MQ $*.sasm ${CFLAGS} ${INCLUDES} - -DEPFILES = $(addsuffix .d, ${TARGETS}) -MEMFILES = $(addsuffix .mem, ${TARGETS}) $(addsuffix -sofan.mem, ${TARGETS}) \ - $(addsuffix .no, ${TARGETS}) $(addsuffix -sofan.no, ${TARGETS}) -ASMFILES = $(addsuffix .sasm, ${TARGETS}) $(addsuffix .masm, ${TARGETS}) $(addsuffix -sofan.masm, ${TARGETS}) -NCDFILES = $(addsuffix .ncd, ${TARGETS}) $(addsuffix -sofan.ncd, ${TARGETS}) -SFOUTFILES = $(addsuffix .svn-out, ${TARGETS}) $(addsuffix .svn-out%, ${TARGETS}) \ - $(addsuffix .sf_log, ${TARGETS}) $(addsuffix .sf_log%, ${TARGETS}) \ - $(addsuffix .sf_log0, ${TARGETS}) $(addsuffix .sf_log0%, ${TARGETS}) \ - $(addsuffix .err-sf, ${TARGETS}) $(addsuffix .svn-out, ${TARGETS}) \ - $(addsuffix .dmo, ${TARGETS}) \ - $(addsuffix -sofan.svn-out, ${TARGETS}) $(addsuffix -sofan.svn-out%, ${TARGETS}) \ - $(addsuffix -sofan.sf_log, ${TARGETS}) $(addsuffix -sofan.sf_log%, ${TARGETS}) \ - $(addsuffix -sofan.sf_log0, ${TARGETS}) $(addsuffix -sofan.sf_log0%, ${TARGETS}) \ - $(addsuffix -sofan.err-sf, ${TARGETS}) $(addsuffix -sofan.svn-out, ${TARGETS}) \ - $(addsuffix -sofan.dmo, ${TARGETS}) -GCCBINARIES = $(addsuffix .gccbin, ${TARGETS}) - -all: $(addsuffix -sofan.mem, ${TARGETS}) -allgcc: $(addsuffix .gccbin, ${TARGETS}) - --include $(DEPFILES) - -%.mem: %.masm - ${SHAKER} ${SHAKEROPTS} $< - -%.masm: %.sasm - ${MPP} ${OS} ${MPPOPTS} $< - -%-sofan.masm: %.masm - ${SOFAN} ${SOFANOPTS} $< $@ - -%.sasm: %.c Makefile - ${NLCC} ${NLCCOPTS} ${NLCCOS} -S $< - -%.ncd: %.mem - dispminit $< > $@ - -%-sofan.perf: %-sofan.ncd - nperf -asm=$*.sasm -c -l -a $< > $@ || (rm -f $@; exit 1) - -# beware, this is not very general -%.gccbin: %.c - gcc -I../ $< -o $@ - -$(DEPFILES): %.d: %.c Makefile - $(CCDEP) ${DEPFLAGS} ${INCLUDES} $< > $@ - -clean: - rm -f ${ASMFILES} ${MEMFILES} ${NCDFILES} ${GCCBINARIES} - -distclean: clean - rm -f ${DEPFILES} ${SFOUTFILES} - -.SECONDARY: -.DELETE_ON_ERROR: diff --git a/src/lib/test/check_geometry.c b/src/lib/test/check_geometry.c index 74589a739..b9f14eb4d 100644 --- a/src/lib/test/check_geometry.c +++ b/src/lib/test/check_geometry.c @@ -90,7 +90,7 @@ int check_geometry() { ix = g_ipt[x0][x1][x2][x3]; iy0 = g_iup[ix][0]; -#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) if (x0 != T - 1) { iz0 = g_ipt[(x0 + 1) % T][x1][x2][x3]; } else { @@ -107,7 +107,7 @@ int check_geometry() { #endif iy1 = g_iup[ix][1]; -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) if (x1 != LX - 1) { iz1 = g_ipt[x0][(x1 + 1) % LX][x2][x3]; } else { @@ -125,7 +125,7 @@ int check_geometry() { #endif iy2 = g_iup[ix][2]; -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) if (x2 != LY - 1) { iz2 = g_ipt[x0][x1][(x2 + 1) % LY][x3]; } else { @@ -145,7 +145,7 @@ int check_geometry() { #endif iy3 = g_iup[ix][3]; -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT if (x3 != LZ - 1) { iz3 = g_ipt[x0][x1][x2][(x3 + 1) % LZ]; } else { @@ -176,7 +176,7 @@ int check_geometry() { } iy0 = g_idn[ix][0]; -#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) if (x0 != 0) { iz0 = g_ipt[(x0 + T - 1) % T][x1][x2][x3]; } else { @@ -194,7 +194,7 @@ int check_geometry() { #endif iy1 = g_idn[ix][1]; -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) if (x1 != 0) { iz1 = g_ipt[x0][(x1 + LX - 1) % LX][x2][x3]; } else { @@ -212,7 +212,7 @@ int check_geometry() { iz1 = g_ipt[x0][(x1 + LX - 1) % LX][x2][x3]; #endif iy2 = g_idn[ix][2]; -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) if (x2 != 0) { iz2 = g_ipt[x0][x1][(x2 + LY - 1) % LY][x3]; } else { @@ -231,7 +231,7 @@ int check_geometry() { #endif iy3 = g_idn[ix][3]; -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT if (x3 != 0) { iz3 = g_ipt[x0][x1][x2][(x3 + LZ - 1) % LZ]; } else { @@ -262,8 +262,8 @@ int check_geometry() { } /* The edges */ - /* In case of PARALLELT there is actually no edge to take care of */ -#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) + /* In case of TM_PARALLELT there is actually no edge to take care of */ +#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) if (x0 == 0) { iy0 = g_idn[g_idn[ix][1]][0]; if (x1 != 0) { @@ -318,7 +318,7 @@ int check_geometry() { #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) if (x0 == 0) { iy0 = g_idn[g_idn[ix][2]][0]; if (x2 != 0) { @@ -421,7 +421,7 @@ int check_geometry() { } } #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT if (x0 == 0) { iy0 = g_idn[g_idn[ix][3]][0]; if (x3 != 0) { @@ -700,7 +700,7 @@ int check_geometry() { } } -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) for (x0 = 0; x0 < T + 2; x0++) { for (x2 = 0; x2 < LY; x2++) { for (x3 = 0; x3 < LZ; x3++) { @@ -827,7 +827,7 @@ int check_geometry() { } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) for (x0 = 0; x0 < T + 2; x0++) { for (x1 = 0; x1 < LX + 2; x1++) { @@ -1027,7 +1027,7 @@ int check_geometry() { } } #endif -#ifdef PARALLELXYZT +#ifdef TM_PARALLELXYZT for (x0 = 0; x0 < T + 2; x0++) { for (x1 = 0; x1 < LX + 2; x1++) { for (x2 = 0; x2 < LY + 2; x2++) { diff --git a/src/lib/test/check_overlap.c b/src/lib/test/check_overlap.c index 43742a21b..56763cff4 100644 --- a/src/lib/test/check_overlap.c +++ b/src/lib/test/check_overlap.c @@ -105,12 +105,12 @@ int main(int argc, char *argv[]) { char *gaugecksum = NULL; double plaquette_energy; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst init #pragma pomp inst begin(main) #endif -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON MPI_File fh; LemonWriter *lemonWriter; paramsXlfInfo *xlfInfo; @@ -188,7 +188,7 @@ int main(int argc, char *argv[]) { g_dbw2rand = 0; #endif -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); @@ -273,7 +273,7 @@ int main(int argc, char *argv[]) { phmc_invmaxev = 1.; -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "Not enough memory for halffield! Aborting...\n"); @@ -286,7 +286,7 @@ int main(int argc, char *argv[]) { exit(-1); } } -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) if (even_odd_flag) { init_xchange_halffield(); } @@ -299,9 +299,9 @@ int main(int argc, char *argv[]) { printf("Reading Gauge field from file %s\n", conf_filename); fflush(stdout); } -#ifdef HAVE_LIBLEMON +#ifdef TM_USE_LEMON read_lemon_gauge_field_parallel(conf_filename, &gaugecksum, &xlfmessage, &gaugelfn); -#else /* HAVE_LIBLEMON */ +#else /* TM_USE_LEMON */ if (xlfmessage != (char *)NULL) free(xlfmessage); if (gaugelfn != (char *)NULL) free(gaugelfn); if (gaugecksum != (char *)NULL) free(gaugecksum); @@ -310,7 +310,7 @@ int main(int argc, char *argv[]) { gaugelfn = read_message(conf_filename, "ildg-data-lfn"); gaugecksum = read_message(conf_filename, "scidac-checksum"); printf("%s \n", gaugecksum); -#endif /* HAVE_LIBLEMON */ +#endif /* TM_USE_LEMON */ if (g_proc_id == 0) { printf("done!\n"); fflush(stdout); @@ -389,7 +389,7 @@ int main(int argc, char *argv[]) { free_chi_dn_spinor_field(); } return (0); -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(main) #endif } diff --git a/src/lib/test/check_xchange.c b/src/lib/test/check_xchange.c index db5d97cb3..a20f86df4 100644 --- a/src/lib/test/check_xchange.c +++ b/src/lib/test/check_xchange.c @@ -63,7 +63,7 @@ int check_xchange() { } } -#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) for (x0 = 0; x0 < T; x0++) { for (x2 = 0; x2 < LY; x2++) { for (x3 = 0; x3 < LZ; x3++) { @@ -74,7 +74,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) for (x0 = 0; x0 < T; x0++) { for (x1 = 0; x1 < LX; x1++) { for (x3 = 0; x3 < LZ; x3++) { @@ -113,7 +113,7 @@ int check_xchange() { } } -#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT)) +#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) x = (double*)&g_spinor_field[0][(VOLUME + 2 * LX * LY * LZ) / 2]; for (i = 0; i < T * LY * LZ / 2 * 24; i++, x++) { if ((int)(*x) != g_nb_x_up) { @@ -139,7 +139,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) x = (double*)&g_spinor_field[0][(VOLUME + 2 * LX * LY * LZ) / 2 + 2 * T * LY * LZ / 2]; for (i = 0; i < T * LX * LZ / 2 * 24; i++, x++) { if ((int)(*x) != g_nb_y_up) { @@ -166,7 +166,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) set_spinor_field(0, -1.); for (x0 = 0; x0 < T; x0++) { @@ -270,7 +270,7 @@ int check_xchange() { } } -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* Set the x boundary */ for (x0 = 0; x0 < T; x0++) { for (x2 = 0; x2 < LY; x2++) { @@ -284,7 +284,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* Set the y boundary */ for (x0 = 0; x0 < T; x0++) { for (x1 = 0; x1 < LX; x1++) { @@ -298,7 +298,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* Set the z boundary */ for (x0 = 0; x0 < T; x0++) { for (x1 = 0; x1 < LX; x1++) { @@ -340,7 +340,7 @@ int check_xchange() { } } -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) x = (double*)&g_gauge_field[(T + 2) * LX * LY * LZ][0]; for (i = 0; i < T * LY * LZ * 72; i++, x++) { if ((int)(*x) != g_nb_x_up) { @@ -368,7 +368,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) x = (double*)&g_gauge_field[(T + 2) * LX * LY * LZ + 2 * T * LZ * LY][0]; for (i = 0; i < T * LX * LZ * 72; i++, x++) { if ((int)(*x) != g_nb_y_up) { @@ -396,7 +396,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) x = (double*)g_gauge_field[VOLUME + 2 * LX * LY * LZ + 2 * T * LZ * LY + 2 * T * LX * LZ]; for (i = 0; i < T * LX * LY * 72; i++, x++) { if ((int)(*x) != g_nb_z_up) { @@ -504,7 +504,7 @@ int check_xchange() { MPI_Barrier(MPI_COMM_WORLD); /* The edges */ -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) fprintf(stdout, "# Rank: %d, (c0, c1, c2, c3) = (%d, %d, %d, %d)\n", g_proc_id, g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]); fflush(stdout); @@ -577,7 +577,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) di[1] = (g_proc_coords[1] - 1) % g_nproc_x; di[2] = (g_proc_coords[2] - 1) % g_nproc_y; di[0] = g_proc_coords[0]; @@ -712,7 +712,7 @@ int check_xchange() { } } #endif -#ifdef PARALLELXYZT +#ifdef TM_PARALLELXYZT di[1] = (g_proc_coords[1] - 1) % g_nproc_x; di[3] = (g_proc_coords[3] - 1) % g_nproc_z; di[0] = g_proc_coords[0]; @@ -1001,7 +1001,7 @@ int check_xchange() { } } -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) x = (double*)&g_gauge_field[VOLUMEPLUSRAND + 2 * LX * LY * LZ][0]; for (i = 0; i < T * LY * LZ * 72; i++, x++) { if ((int)(*x) != g_nb_x_up) { @@ -1029,7 +1029,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) x = (double*)&g_gauge_field[VOLUMEPLUSRAND + 2 * LX * LY * LZ + 2 * T * LZ * LY][0]; for (i = 0; i < T * LX * LZ * 72; i++, x++) { if ((int)(*x) != g_nb_y_up) { @@ -1058,7 +1058,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) x = (double*)&g_gauge_field[VOLUMEPLUSRAND + 2 * LX * LY * LZ + 2 * T * LZ * LY + 2 * T * LX * LZ][0]; for (i = 0; i < T * LX * LY * 72; i++, x++) { @@ -1088,7 +1088,7 @@ int check_xchange() { } #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT set_gauge_field(-1.); @@ -1279,7 +1279,7 @@ int check_xchange() { } } } -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT /* Set the tz boundary */ for (x1 = 0; x1 < LX; x1++) { for (x2 = 0; x2 < LY; x2++) { @@ -1332,7 +1332,7 @@ int check_xchange() { xchange_gauge(g_gauge_field); MPI_Barrier(MPI_COMM_WORLD); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) di[0] = (g_proc_coords[0] - 1) % g_nproc_t; di[1] = (g_proc_coords[1] - 1) % g_nproc_x; di[2] = g_proc_coords[2]; @@ -1453,7 +1453,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) di[1] = (g_proc_coords[1] - 1) % g_nproc_x; di[2] = (g_proc_coords[2] - 1) % g_nproc_y; @@ -1693,7 +1693,7 @@ int check_xchange() { } } #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT di[0] = (g_proc_coords[0] - 1) % g_nproc_t; di[3] = (g_proc_coords[3] - 1) % g_nproc_z; @@ -2123,7 +2123,7 @@ int check_xchange() { } } } -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) for (x0 = 0; x0 < T; x0++) { for (x2 = 0; x2 < LY; x2++) { for (x3 = 0; x3 < LZ; x3++) { @@ -2145,7 +2145,7 @@ int check_xchange() { } } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) for (x0 = 0; x0 < T; x0++) { for (x1 = 0; x1 < LX; x1++) { for (x3 = 0; x3 < LZ; x3++) { @@ -2167,7 +2167,7 @@ int check_xchange() { } } #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT for (x0 = 0; x0 < T; x0++) { for (x1 = 0; x1 < LX; x1++) { for (x2 = 0; x2 < LY; x2++) { @@ -2194,7 +2194,7 @@ int check_xchange() { xchange_deri(df0); MPI_Barrier(MPI_COMM_WORLD); -#if defined PARALLELT +#if defined TM_PARALLELT for (x1 = 0; x1 < LX; x1++) { for (x2 = 0; x2 < LY; x2++) { for (x3 = 0; x3 < LZ; x3++) { @@ -2228,7 +2228,7 @@ int check_xchange() { } } #endif -#if defined PARALLELXT +#if defined TM_PARALLELXT for (x1 = 1; x1 < LX - 1; x1++) { for (x2 = 0; x2 < LY; x2++) { for (x3 = 0; x3 < LZ; x3++) { @@ -2351,7 +2351,7 @@ int check_xchange() { } } #endif -#if defined PARALLELXYT +#if defined TM_PARALLELXYT for (x1 = 1; x1 < LX - 1; x1++) { for (x2 = 1; x2 < LY - 1; x2++) { for (x3 = 0; x3 < LZ; x3++) { @@ -2748,7 +2748,7 @@ int check_xchange() { #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT for (x1 = 1; x1 < LX - 1; x1++) { for (x2 = 1; x2 < LY - 1; x2++) { for (x3 = 1; x3 < LZ - 1; x3++) { @@ -3026,7 +3026,7 @@ int check_xchange() { } } -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) // xt edge for (x2 = 0; x2 < LY; x2++) { @@ -3063,7 +3063,7 @@ int check_xchange() { } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) // ty edge for (x1 = 0; x1 < LX; x1++) { @@ -3139,7 +3139,7 @@ int check_xchange() { xchange_deri(df0); MPI_Barrier(MPI_COMM_WORLD); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) di[0] = (g_proc_coords[0] - 1) % g_nproc_t; di[1] = (g_proc_coords[1] - 1) % g_nproc_x; @@ -3156,7 +3156,7 @@ int check_xchange() { di[1] = (g_proc_coords[1] + 1) % g_nproc_x; MPI_Cart_rank(g_cart_grid, di, &pp); -#ifdef PARALLELXT +#ifdef TM_PARALLELXT for (x2 = 0; x2 < LY; x2++) { for (x3 = 0; x3 < LZ; x3++) { #else @@ -3224,7 +3224,7 @@ int check_xchange() { #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) // xy-edge di[1] = (g_proc_coords[1] - 1) % g_nproc_x; diff --git a/src/lib/test/measure_rectangles.debug.c b/src/lib/test/measure_rectangles.debug.c index 75a71d2b2..422f681b2 100644 --- a/src/lib/test/measure_rectangles.debug.c +++ b/src/lib/test/measure_rectangles.debug.c @@ -61,10 +61,10 @@ double measure_rectangles() { char filename[100]; sprintf(filename, "debug_mr.s"); -#ifdef PARALLELT +#ifdef TM_PARALLELT sprintf(filename, "debug_mr.pt.%d", g_proc_id); #endif -#ifdef PARALLELXT +#ifdef TM_PARALLELXT sprintf(filename, "debug_mr.pxt.%d", g_proc_id); #endif debugfile = fopen(filename, "w"); diff --git a/src/lib/update_backward_gauge.c b/src/lib/update_backward_gauge.c index a041e577c..b28ab6acf 100644 --- a/src/lib/update_backward_gauge.c +++ b/src/lib/update_backward_gauge.c @@ -25,7 +25,7 @@ #include "su3.h" #include "update_backward_gauge.h" -#if defined _USE_HALFSPINOR +#if defined TM_USE_HALFSPINOR void update_backward_gauge(su3** const gf) { #ifdef TM_USE_OMP #pragma omp parallel diff --git a/src/lib/update_gauge.c b/src/lib/update_gauge.c index dde4cbf31..af4730e01 100644 --- a/src/lib/update_gauge.c +++ b/src/lib/update_gauge.c @@ -39,7 +39,7 @@ #include "su3spinor.h" #include "update_gauge.h" #include "xchange/xchange.h" -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif /******************************************************* @@ -51,7 +51,7 @@ void update_gauge(const double step, hamiltonian_field_t *const hf) { tm_stopwatch_push(&g_timers, __func__, ""); update_tm_gauge_id(&g_gauge_state, step); -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG MG_update_gauge(step); #endif @@ -65,7 +65,7 @@ void update_gauge(const double step, hamiltonian_field_t *const hf) { su3 *z; static su3adj deriv; su3adj *xm; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(updategauge) #endif @@ -115,7 +115,7 @@ void update_gauge(const double step, hamiltonian_field_t *const hf) { tm_stopwatch_pop(&g_timers, 0, 1, ""); return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(updategauge) #endif } diff --git a/src/lib/update_momenta_fg.c b/src/lib/update_momenta_fg.c index 0aab582cd..cf1e9e4fb 100644 --- a/src/lib/update_momenta_fg.c +++ b/src/lib/update_momenta_fg.c @@ -44,7 +44,7 @@ #include "su3adj.h" #include "su3spinor.h" #include "xchange/xchange.h" -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif @@ -123,7 +123,7 @@ void fg_update_momenta_reset_gaugefield(const double step, hamiltonian_field_t * *******************************************************/ void update_momenta_fg(int *mnllist, double step, const int no, hamiltonian_field_t *const hf, double step0) { -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG MG_update_gauge(0.0); #endif if (g_exposu3_no_c == 0) init_exposu3(); @@ -156,7 +156,7 @@ void update_momenta_fg(int *mnllist, double step, const int no, hamiltonian_fiel /* for parallelization */ xchange_gauge(hf->gaugefield); #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG MG_update_gauge(0.0); #endif @@ -201,7 +201,7 @@ void update_momenta_fg(int *mnllist, double step, const int no, hamiltonian_fiel /* for parallelization */ xchange_gauge(hf->gaugefield); #endif -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG MG_update_gauge(0.0); #endif diff --git a/src/lib/update_tm.c b/src/lib/update_tm.c index 72a6194e7..3f1cdc5d5 100644 --- a/src/lib/update_tm.c +++ b/src/lib/update_tm.c @@ -64,7 +64,7 @@ #include "su3.h" #include "update_tm.h" #include "xchange/xchange.h" -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG #include "DDalphaAMG_interface.h" #endif @@ -120,7 +120,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy, char *filename } } -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG MG_reset(); #endif @@ -211,7 +211,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy, char *filename free(xlfInfo); } -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG MG_reset(); #endif @@ -354,7 +354,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy, char *filename // will result in the updated gauge field to be propagated update_tm_gauge_id(&g_gauge_state, TM_GAUGE_PROPAGATE_THRESHOLD); update_tm_gauge_id(&g_gauge_state_32, TM_GAUGE_PROPAGATE_THRESHOLD); -#ifdef DDalphaAMG +#ifdef TM_USE_DDalphaAMG MG_reset(); #endif } diff --git a/src/lib/util/io.c b/src/lib/util/io.c index 6df42d288..4f6267c78 100644 --- a/src/lib/util/io.c +++ b/src/lib/util/io.c @@ -36,7 +36,7 @@ * */ -#define _FILE_OFFSET_BITS 64 +#define TM_FILE_OFFSET_BITS 64 #include "io.h" #include diff --git a/src/lib/util/laguer/Makefile b/src/lib/util/laguer/Makefile deleted file mode 100644 index f9bce70e3..000000000 --- a/src/lib/util/laguer/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -CXX=g++ -CXXFLAGS=-g -O2 -CLNDIR=${HOME}/daten/workdir/cln/ - -chebyRoot: chebyRoot.C Makefile chebyRoot.H - ${CXX} $< -g -o $@ -I${CLNDIR}/include/ -L${CLNDIR}/lib -lcln -lm - -clean: - rm -f *.o chebyRoot *.dat *.log *~ diff --git a/src/lib/util/oox/Makefile b/src/lib/util/oox/Makefile deleted file mode 100644 index 88de5bdd5..000000000 --- a/src/lib/util/oox/Makefile +++ /dev/null @@ -1,46 +0,0 @@ -CC=gcc -CXX=g++ -CFLAGS=-O2 -fexpensive-optimizations -fomit-frame-pointer # -mfpmath=sse -msse2 -LIBS=-lm -OBJECTS_OOX=oox.o -INCLUDE=-I./ - - -# variables for oox_ga executable -# if you want to compile with ga lib support -# please adjust the GALIBPATH variable -# to the toplevel dir of galib -# it is assumed that you compiled the library -# such that a libga.a file is present in the -# ./ga subdir of galib -GALIBPATH=/usr1/scratch/annube/galib247 -LIBS_GA=${LIBS} -L${GALIBPATH}/ga -lga -CFLAGS_GA=${CFLAGS} -DWITHGALIB -INCLUDE_GA=${INCLUDE} -I${GALIBPATH} -OBJECTS_OOX_GA=oox_ga.o oox_gawrapper.o - - -all: oox oox_ga - -oox: ${OBJECTS_OOX} Makefile - ${CXX} ${OBJECTS_OOX} -o $@ ${CFLAGS} ${LIBS} - -oox_ga: ${OBJECTS_OOX_GA} Makefile - ${CXX} ${OBJECTS_OOX_GA} -o $@ ${CFLAGS_GA} ${LIBS_GA} - -oox_gawrapper.o: oox_gawrapper.cxx - ${CXX} ${CFLAGS_GA} -o $@ -c $< ${INCLUDE_GA} - -oox_ga.o: oox.c - ${CC} ${CFLAGS_GA} -o $@ -c $< ${INCLUDE_GA} - -clean: - rm oox oox_ga *.o - -.SUFFIXES: - -%.o: %.c - ${CC} ${CFLAGS} -o $@ -c $< ${INCLUDE} - -%.o: %.cxx - ${CXX} ${CFLAGS} -o $@ -c $< ${INCLUDE} diff --git a/src/lib/wrapper/lib_wrapper.c b/src/lib/wrapper/lib_wrapper.c index 6c95a27d5..9f083adc5 100644 --- a/src/lib/wrapper/lib_wrapper.c +++ b/src/lib/wrapper/lib_wrapper.c @@ -121,7 +121,7 @@ int tmLQCD_invert_init(int argc, char* argv[], const int _verbose, const int ext for (int j = 0; j < no_operators; j++) if (!operator_list[j].even_odd_flag) even_odd_flag = 0; -#ifdef _GAUGE_COPY +#ifdef TM_GAUGE_COPY int j = init_gauge_field(VOLUMEPLUSRAND, 1); j += init_gauge_field_32(VOLUMEPLUSRAND, 1); #else @@ -161,7 +161,7 @@ int tmLQCD_invert_init(int argc, char* argv[], const int _verbose, const int ext // initialise the operators init_operators(); -#ifdef _USE_HALFSPINOR +#ifdef TM_USE_HALFSPINOR j = init_dirac_halfspinor(); if (j != 0) { fprintf(stderr, "tmLQCD_init_invert: Not enough memory for halffield! Aborting...\n"); @@ -172,7 +172,7 @@ int tmLQCD_invert_init(int argc, char* argv[], const int _verbose, const int ext fprintf(stderr, "tmLQCD_init_invert: Not enough memory for 32-bit halffield! Aborting...\n"); return (-1); } -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) if (even_odd_flag) init_xchange_halffield(); #endif #endif diff --git a/src/lib/xchange/xchange_2fields.c b/src/lib/xchange/xchange_2fields.c index c5dfa86a8..c311bf908 100644 --- a/src/lib/xchange/xchange_2fields.c +++ b/src/lib/xchange/xchange_2fields.c @@ -41,18 +41,18 @@ #include "su3.h" #include "xchange_2fields.h" -#if (defined _NON_BLOCKING) +#if (defined TM_NON_BLOCKING) /* this version uses non-blocking MPI calls */ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) { MPI_Request requests[32]; MPI_Status status[32]; int reqcount = 0; -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT int ix = 0; #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(xchange2fields) #endif @@ -88,7 +88,7 @@ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) { g_cart_grid, &requests[reqcount + 1]); reqcount = reqcount + 2; -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend((void*)l, 1, field_x_slice_gath, g_nb_x_dn, 91, g_cart_grid, &requests[reqcount]); @@ -120,7 +120,7 @@ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) { reqcount = reqcount + 2; #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)l, 1, field_y_slice_gath, g_nb_y_dn, 101, g_cart_grid, &requests[reqcount]); @@ -153,7 +153,7 @@ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) { #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* fill buffer ! */ /* This is now depending on whether the field is */ /* even or odd */ @@ -237,8 +237,8 @@ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) { MPI_Waitall(reqcount, requests, status); #endif return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(xchange2fields) #endif } -#endif /* _NON_BLOCKING */ +#endif /* TM_NON_BLOCKING */ diff --git a/src/lib/xchange/xchange_2fields.h b/src/lib/xchange/xchange_2fields.h index 35dc7f6c5..6a83085f0 100644 --- a/src/lib/xchange/xchange_2fields.h +++ b/src/lib/xchange/xchange_2fields.h @@ -31,7 +31,7 @@ #define EVEN 1 #define ODD 0 -#ifdef _NON_BLOCKING +#ifdef TM_NON_BLOCKING void xchange_2fields(spinor* const k, spinor* const l, const int ieo); #else #define xchange_2fields(k, l, ieo) \ diff --git a/src/lib/xchange/xchange_deri.c b/src/lib/xchange/xchange_deri.c index a260ed8b6..7defa1e7c 100644 --- a/src/lib/xchange/xchange_deri.c +++ b/src/lib/xchange/xchange_deri.c @@ -55,7 +55,7 @@ void xchange_deri(su3adj** const df) { #ifdef TM_USE_MPI int ix, iy, t, y, z, x; MPI_Status status; -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* The edges need to come first */ /* send the data to the neighbour on the left in t direction */ @@ -96,9 +96,9 @@ void xchange_deri(su3adj** const df) { } } -#endif /* (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) */ +#endif /* (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) */ -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* edges */ /* send the data to the neighbour on the left in x direction */ @@ -178,9 +178,9 @@ void xchange_deri(su3adj** const df) { } } -#endif /* (defined PARALLELXYT || defined PARALLELXYZT) */ +#endif /* (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) */ -#ifdef PARALLELXYZT +#ifdef TM_PARALLELXYZT /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ @@ -305,7 +305,7 @@ void xchange_deri(su3adj** const df) { } } -#endif /* PARALLELXYZT */ +#endif /* TM_PARALLELXYZT */ // now the normal boundaries @@ -341,7 +341,7 @@ void xchange_deri(su3adj** const df) { } } -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Sendrecv((void*)df[(T + 2) * LX * LY * LZ + T * LY * LZ], 1, deri_x_slice_cont, g_nb_x_dn, 42, @@ -372,9 +372,9 @@ void xchange_deri(su3adj** const df) { } } -#endif /* (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) */ +#endif /* (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) */ -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ @@ -406,9 +406,9 @@ void xchange_deri(su3adj** const df) { } } -#endif /* (defined PARALLELXYT || defined PARALLELXYZT) */ +#endif /* (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) */ -#ifdef PARALLELXYZT +#ifdef TM_PARALLELXYZT /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Sendrecv( @@ -441,7 +441,7 @@ void xchange_deri(su3adj** const df) { } } -#endif /* PARALLELXYZT */ +#endif /* TM_PARALLELXYZT */ #endif /* MPI */ return; } diff --git a/src/lib/xchange/xchange_field.c b/src/lib/xchange/xchange_field.c index 576574789..417aa8981 100644 --- a/src/lib/xchange/xchange_field.c +++ b/src/lib/xchange/xchange_field.c @@ -35,7 +35,7 @@ #ifdef TM_USE_MPI #include #endif -#ifdef _USE_SHMEM +#ifdef TM_USE_SHMEM #include #endif @@ -44,30 +44,30 @@ #include "su3.h" #include "xchange_field.h" -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) #pragma disjoint(*field_buffer_z2, *field_buffer_z) #endif /* this version uses non-blocking MPI calls */ -#if (defined _NON_BLOCKING) +#if (defined TM_NON_BLOCKING) void xchange_field(spinor* const l, const int ieo) { #ifdef TM_USE_MPI MPI_Request requests[16]; MPI_Status status[16]; #endif -#ifdef PARALLELT +#ifdef TM_PARALLELT int reqcount = 4; -#elif defined PARALLELXT +#elif defined TM_PARALLELXT int reqcount = 8; -#elif defined PARALLELXYT +#elif defined TM_PARALLELXYT int reqcount = 12; -#elif defined PARALLELXYZT +#elif defined TM_PARALLELXYZT int ix = 0; int reqcount = 16; #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(xchangefield) #endif @@ -84,7 +84,7 @@ void xchange_field(spinor* const l, const int ieo) { MPI_Isend((void*)l, 1, field_time_slice_cont, g_nb_t_dn, 81, g_cart_grid, &requests[0]); MPI_Irecv((void*)(l + T * LX * LY * LZ / 2), 1, field_time_slice_cont, g_nb_t_up, 81, g_cart_grid, &requests[1]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend((void*)l, 1, field_x_slice_gath, g_nb_x_dn, 91, g_cart_grid, &requests[4]); @@ -92,7 +92,7 @@ void xchange_field(spinor* const l, const int ieo) { g_cart_grid, &requests[5]); #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)l, 1, field_y_slice_gath, g_nb_y_dn, 101, g_cart_grid, &requests[8]); @@ -100,7 +100,7 @@ void xchange_field(spinor* const l, const int ieo) { g_nb_y_up, 101, g_cart_grid, &requests[9]); #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* fill buffer ! */ /* This is now depending on whether the field is */ /* even or odd */ @@ -129,7 +129,7 @@ void xchange_field(spinor* const l, const int ieo) { MPI_Irecv((void*)(l + (T + 1) * LX * LY * LZ / 2), 1, field_time_slice_cont, g_nb_t_dn, 82, g_cart_grid, &requests[3]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ MPI_Isend((void*)(l + (LX - 1) * LY * LZ / 2), 1, field_x_slice_gath, g_nb_x_up, 92, @@ -138,7 +138,7 @@ void xchange_field(spinor* const l, const int ieo) { g_nb_x_dn, 92, g_cart_grid, &requests[7]); #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(l + (LY - 1) * LZ / 2), 1, field_y_slice_gath, g_nb_y_up, 102, g_cart_grid, @@ -147,7 +147,7 @@ void xchange_field(spinor* const l, const int ieo) { field_y_slice_cont, g_nb_y_dn, 102, g_cart_grid, &requests[11]); #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT if (ieo == 1) { for (ix = T * LX * LY / 2; ix < T * LX * LY; ix++) { field_buffer_z2[ix - T * LX * LY / 2] = l[g_field_z_ipt_even[ix]]; @@ -174,7 +174,7 @@ void xchange_field(spinor* const l, const int ieo) { g_cart_grid, &requests[0]); MPI_Irecv((void*)(l + (T + 1) * LX * LY * LZ / 2), 1, field_time_slice_cont, g_nb_t_dn, 82, g_cart_grid, &requests[1]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ MPI_Isend((void*)(l + (LX - 1) * LY * LZ / 2), 1, field_x_slice_gath, g_nb_x_up, 92, @@ -183,7 +183,7 @@ void xchange_field(spinor* const l, const int ieo) { g_nb_x_dn, 92, g_cart_grid, &requests[5]); #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(l + (LY - 1) * LZ / 2), 1, field_y_slice_gath, g_nb_y_up, 102, g_cart_grid, @@ -192,7 +192,7 @@ void xchange_field(spinor* const l, const int ieo) { field_y_slice_cont, g_nb_y_dn, 102, g_cart_grid, &requests[9]); #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* fill buffer ! */ /* This is now depending on whether the field is */ /* even or odd */ @@ -218,7 +218,7 @@ void xchange_field(spinor* const l, const int ieo) { MPI_Isend((void*)l, 1, field_time_slice_cont, g_nb_t_dn, 81, g_cart_grid, &requests[2]); MPI_Irecv((void*)(l + T * LX * LY * LZ / 2), 1, field_time_slice_cont, g_nb_t_up, 81, g_cart_grid, &requests[3]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend((void*)l, 1, field_x_slice_gath, g_nb_x_dn, 91, g_cart_grid, &requests[6]); @@ -226,7 +226,7 @@ void xchange_field(spinor* const l, const int ieo) { g_cart_grid, &requests[7]); #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)l, 1, field_y_slice_gath, g_nb_y_dn, 101, g_cart_grid, &requests[10]); @@ -234,7 +234,7 @@ void xchange_field(spinor* const l, const int ieo) { g_nb_y_up, 101, g_cart_grid, &requests[11]); #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT if (ieo == 1) { for (ix = T * LX * LY / 2; ix < T * LX * LY; ix++) { field_buffer_z2[ix - T * LX * LY / 2] = l[g_field_z_ipt_even[ix]]; @@ -259,12 +259,12 @@ void xchange_field(spinor* const l, const int ieo) { #endif return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(xchangefield) #endif } -#elif (defined _USE_SHMEM) /* _NON_BLOCKING */ +#elif (defined TM_USE_SHMEM) /* TM_NON_BLOCKING */ /* Here comes the version with shared memory */ /* exchanges the field l */ @@ -273,7 +273,7 @@ void xchange_field(spinor* const l, const int ieo) { #ifdef TM_USE_MPI int i, ix, mu, x0, x1, x2, x3, k; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(xchangefield) #endif @@ -283,7 +283,7 @@ void xchange_field(spinor* const l, const int ieo) { shmem_double_put((double*)(l + (T + 1) * LX * LY * LZ / 2), (double*)(l + (T - 1) * LX * LY * LZ / 2), (LX * LY * LZ * 12), g_nb_t_up); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) k = (T + 2) * LX * LY * LZ / 2; for (x0 = 0; x0 < T; x0++) { shmem_double_put((double*)(l + k), (double*)(l + g_lexic2eo[g_ipt[x0][0][0][0]]), 12 * LZ * LY, @@ -298,7 +298,7 @@ void xchange_field(spinor* const l, const int ieo) { } #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) k = ((T + 2) * LX * LY * LZ + 2 * T * LY * LZ) / 2; for (x0 = 0; x0 < T; x0++) { for (x1 = 0; x1 < LX; x1++) { @@ -317,7 +317,7 @@ void xchange_field(spinor* const l, const int ieo) { } #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) x0 = (VOLUME / 2 + LX * LY * LZ + T * LY * LZ + T * LX * LZ); if (ieo == 1) { for (k = 0; k < T * LX * LY / 2; k++) { @@ -347,21 +347,21 @@ void xchange_field(spinor* const l, const int ieo) { shmem_barrier_all(); #endif // MPI return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(xchangefield) #endif } /* Here comes the naive version */ /* Using MPI_Sendrecv */ -#else /* _NON_BLOCKING _USE_SHMEM */ +#else /* TM_NON_BLOCKING TM_USE_SHMEM */ /* exchanges the field l */ void xchange_field(spinor* const l, const int ieo) { -#ifdef PARALLELXYZT +#ifdef TM_PARALLELXYZT int x0 = 0, x1 = 0, x2 = 0, ix = 0; #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(xchangefield) #endif @@ -379,7 +379,7 @@ void xchange_field(spinor* const l, const int ieo) { (void*)(l + (T + 1) * LX * LY * LZ / 2), 1, field_time_slice_cont, g_nb_t_dn, 82, g_cart_grid, &status); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Sendrecv((void*)l, 1, field_x_slice_gath, g_nb_x_dn, 91, @@ -394,7 +394,7 @@ void xchange_field(spinor* const l, const int ieo) { #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Sendrecv((void*)l, 1, field_y_slice_gath, g_nb_y_dn, 101, @@ -409,7 +409,7 @@ void xchange_field(spinor* const l, const int ieo) { #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* fill buffer ! */ /* This is now depending on whether the field is */ /* even or odd */ @@ -448,9 +448,9 @@ void xchange_field(spinor* const l, const int ieo) { #endif #endif // MPI return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(xchangefield) #endif } -#endif /* _NON_BLOCKING */ +#endif /* TM_NON_BLOCKING */ diff --git a/src/lib/xchange/xchange_gauge.c b/src/lib/xchange/xchange_gauge.c index 3465d970f..6177a3dbb 100644 --- a/src/lib/xchange/xchange_gauge.c +++ b/src/lib/xchange/xchange_gauge.c @@ -38,7 +38,7 @@ #include "su3adj.h" #include "xchange_gauge.h" -#if defined _NON_BLOCKING +#if defined TM_NON_BLOCKING void xchange_gauge(su3** const gf) { int cntr = 0; #ifdef TM_USE_MPI @@ -80,7 +80,7 @@ void xchange_gauge(su3** const gf) { cntr = cntr + 2; } -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend(gf[0], 1, gauge_x_slice_gath, g_nb_x_dn, 87, g_cart_grid, &request[cntr]); @@ -117,7 +117,7 @@ void xchange_gauge(su3** const gf) { #endif MPI_Waitall(cntr, request, status); cntr = 0; -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* The edges */ /* send the data to the neighbour on the left in t direction */ @@ -175,10 +175,10 @@ void xchange_gauge(su3** const gf) { g_cart_grid, &request[cntr + 1]); cntr = cntr + 2; } - /* end of if defined PARALLELXT || PARALLELXYT || PARALLELXYZT*/ + /* end of if defined TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT*/ #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend(gf[0], 1, gauge_y_slice_gath, g_nb_y_dn, 106, g_cart_grid, &request[cntr]); @@ -212,7 +212,7 @@ void xchange_gauge(su3** const gf) { #endif MPI_Waitall(cntr, request, status); cntr = 0; -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* jetzt wirds richtig eklig ... */ @@ -326,9 +326,9 @@ void xchange_gauge(su3** const gf) { cntr = cntr + 2; } - /* end of if defined PARALLELXYT || PARALLELXYZT */ + /* end of if defined TM_PARALLELXYT || TM_PARALLELXYZT */ #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT /* z-Rand */ /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ @@ -361,7 +361,7 @@ void xchange_gauge(su3** const gf) { } #endif MPI_Waitall(cntr, request, status); -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT cntr = 0; /* edges */ @@ -538,13 +538,13 @@ void xchange_gauge(su3** const gf) { } MPI_Waitall(cntr, request, status); - /* end of if defined PARALLELXYZT */ + /* end of if defined TM_PARALLELXYZT */ #endif #endif return; } -#else /* _NON_BLOCKING */ +#else /* TM_NON_BLOCKING */ void xchange_gauge(su3** const gf) { #ifdef TM_USE_MPI @@ -576,7 +576,7 @@ void xchange_gauge(su3** const gf) { g_cart_grid, &status); } -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Sendrecv(gf[0], 1, gauge_x_slice_gath, g_nb_x_dn, 93, gf[(T + 2) * LX * LY * LZ], 1, @@ -648,10 +648,10 @@ void xchange_gauge(su3** const gf) { g_nb_t_up, 98, gf[VOLUMEPLUSRAND + RAND + 6 * LY * LZ], 1, gauge_xt_edge_cont, g_nb_t_dn, 98, g_cart_grid, &status); } - /* end of if defined PARALLELXT || PARALLELXYT || PARALLELXYZT*/ + /* end of if defined TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT*/ #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Sendrecv(gf[0], 1, gauge_y_slice_gath, g_nb_y_dn, 103, @@ -770,9 +770,9 @@ void xchange_gauge(su3** const gf) { gauge_ty_edge_cont, g_nb_y_dn, 298, g_cart_grid, &status); } - /* end of if defined PARALLELXYT || PARALLELXYZT */ + /* end of if defined TM_PARALLELXYT || TM_PARALLELXYZT */ #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT /* z-Rand */ /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ @@ -954,11 +954,11 @@ void xchange_gauge(su3** const gf) { 1, gauge_zy_edge_cont, g_nb_y_dn, 510, g_cart_grid, &status); } - /* end of if defined PARALLELXYZT */ + /* end of if defined TM_PARALLELXYZT */ #endif #endif return; } -#endif /* _NON_BLOCKING */ +#endif /* TM_NON_BLOCKING */ diff --git a/src/lib/xchange/xchange_halffield.c b/src/lib/xchange/xchange_halffield.c index d1eae8a04..3948aa1ca 100644 --- a/src/lib/xchange/xchange_halffield.c +++ b/src/lib/xchange/xchange_halffield.c @@ -41,9 +41,9 @@ #include "su3.h" #include "xchange_halffield.h" -#if (defined _USE_HALFSPINOR) +#if (defined TM_USE_HALFSPINOR) -#if (defined _PERSISTENT) +#if (defined TM_PERSISTENT) MPI_Request prequests[16]; @@ -51,13 +51,13 @@ MPI_Request prequests[16]; void init_xchange_halffield() { #ifdef TM_USE_MPI -#ifdef PARALLELT +#ifdef TM_PARALLELT int reqcount = 4; -#elif defined PARALLELXT +#elif defined TM_PARALLELXT int reqcount = 8; -#elif defined PARALLELXYT +#elif defined TM_PARALLELXYT int reqcount = 12; -#elif defined PARALLELXYZT +#elif defined TM_PARALLELXYZT int x0 = 0, x1 = 0, x2 = 0, ix = 0; int reqcount = 16; #endif @@ -78,7 +78,7 @@ void init_xchange_halffield() { MPI_Recv_init((void*)(recvBuffer), LX * LY * LZ * 12 / 2, MPI_DOUBLE, g_nb_t_up, 82, g_cart_grid, &prequests[3]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ @@ -97,7 +97,7 @@ void init_xchange_halffield() { g_cart_grid, &prequests[7]); #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Send_init((void*)(sendBuffer + LX * LY * LZ + T * LY * LZ), T * LX * LZ * 12 / 2, MPI_DOUBLE, @@ -115,7 +115,7 @@ void init_xchange_halffield() { g_nb_y_up, 102, g_cart_grid, &prequests[11]); #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in z direction */ /* recieve the data from the neighbour on the left in z direction */ MPI_Send_init((void*)(sendBuffer + LX * LY * LZ + T * LY * LZ + T * LX * LZ), @@ -141,13 +141,13 @@ void xchange_halffield() { #ifdef TM_USE_MPI MPI_Status status[16]; -#ifdef PARALLELT +#ifdef TM_PARALLELT int reqcount = 4; -#elif defined PARALLELXT +#elif defined TM_PARALLELXT int reqcount = 8; -#elif defined PARALLELXYT +#elif defined TM_PARALLELXYT int reqcount = 12; -#elif defined PARALLELXYZT +#elif defined TM_PARALLELXYZT int x0 = 0, x1 = 0, x2 = 0, ix = 0; int reqcount = 16; #endif @@ -158,7 +158,7 @@ void xchange_halffield() { return; } -#else /* def (_USE_SHMEM || _PERSISTENT) */ +#else /* def (TM_USE_SHMEM || TM_PERSISTENT) */ /* 4. */ void xchange_halffield() { @@ -166,17 +166,17 @@ void xchange_halffield() { MPI_Request requests[16]; MPI_Status status[16]; -#ifdef PARALLELT +#ifdef TM_PARALLELT int reqcount = 4; -#elif defined PARALLELXT +#elif defined TM_PARALLELXT int reqcount = 8; -#elif defined PARALLELXYT +#elif defined TM_PARALLELXYT int reqcount = 12; -#elif defined PARALLELXYZT +#elif defined TM_PARALLELXYZT int reqcount = 16; #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(xchangehalf) #endif /* send the data to the neighbour on the right in t direction */ @@ -193,7 +193,7 @@ void xchange_halffield() { MPI_Irecv((void*)(recvBuffer), LX * LY * LZ * 12 / 2, MPI_DOUBLE, g_nb_t_up, 82, g_cart_grid, &requests[3]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ @@ -210,7 +210,7 @@ void xchange_halffield() { g_cart_grid, &requests[7]); #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(sendBuffer + LX * LY * LZ + T * LY * LZ), T * LX * LZ * 12 / 2, MPI_DOUBLE, @@ -226,7 +226,7 @@ void xchange_halffield() { g_nb_y_up, 102, g_cart_grid, &requests[11]); #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in z direction */ /* recieve the data from the neighbour on the left in z direction */ MPI_Isend((void*)(sendBuffer + LX * LY * LZ + T * LY * LZ + T * LX * LZ), T * LX * LY * 12 / 2, @@ -246,27 +246,27 @@ void xchange_halffield() { #endif /* MPI */ return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(xchangehalf) #endif } -#endif /* def (_USE_SHMEM || _PERSISTENT) */ +#endif /* def (TM_USE_SHMEM || TM_PERSISTENT) */ void xchange_halffield32() { #ifdef TM_USE_MPI MPI_Request requests[16]; MPI_Status status[16]; -#ifdef PARALLELT +#ifdef TM_PARALLELT int reqcount = 4; -#elif defined PARALLELXT +#elif defined TM_PARALLELXT int reqcount = 8; -#elif defined PARALLELXYT +#elif defined TM_PARALLELXYT int reqcount = 12; -#elif defined PARALLELXYZT +#elif defined TM_PARALLELXYZT int reqcount = 16; #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(xchangehalf32) #endif @@ -284,7 +284,7 @@ void xchange_halffield32() { MPI_Irecv((void*)(recvBuffer32), LX * LY * LZ * 12 / 2, MPI_FLOAT, g_nb_t_up, 82, g_cart_grid, &requests[3]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ @@ -301,7 +301,7 @@ void xchange_halffield32() { g_cart_grid, &requests[7]); #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(sendBuffer32 + LX * LY * LZ + T * LY * LZ), T * LX * LZ * 12 / 2, MPI_FLOAT, @@ -317,7 +317,7 @@ void xchange_halffield32() { g_nb_y_up, 102, g_cart_grid, &requests[11]); #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in z direction */ /* recieve the data from the neighbour on the left in z direction */ MPI_Isend((void*)(sendBuffer32 + LX * LY * LZ + T * LY * LZ + T * LX * LZ), T * LX * LY * 12 / 2, @@ -336,8 +336,8 @@ void xchange_halffield32() { MPI_Waitall(reqcount, requests, status); #endif /* MPI */ return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(xchangehalf32) #endif } -#endif /* defined _USE_HALFSPINOR */ +#endif /* defined TM_USE_HALFSPINOR */ diff --git a/src/lib/xchange/xchange_lexicfield.c b/src/lib/xchange/xchange_lexicfield.c index 9def17fc6..56cc4315c 100644 --- a/src/lib/xchange/xchange_lexicfield.c +++ b/src/lib/xchange/xchange_lexicfield.c @@ -43,7 +43,7 @@ #include "xchange_lexicfield.h" /* this version uses non-blocking MPI calls */ -#if (defined _NON_BLOCKING) +#if (defined TM_NON_BLOCKING) /* this is the version independent of the content of the function Index (only available with * non-blocking)) */ @@ -51,16 +51,16 @@ void xchange_lexicfield(spinor* const l) { MPI_Request requests[16]; MPI_Status status[16]; -#ifdef PARALLELT +#ifdef TM_PARALLELT int reqcount = 4; -#elif defined PARALLELXT +#elif defined TM_PARALLELXT int reqcount = 8; -#elif defined PARALLELXYT +#elif defined TM_PARALLELXYT int reqcount = 12; -#elif defined PARALLELXYZT +#elif defined TM_PARALLELXYZT int reqcount = 16; #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(xchange_lexicfield) #endif @@ -71,7 +71,7 @@ void xchange_lexicfield(spinor* const l) { MPI_Isend((void*)l, 1, lfield_time_slice_cont, g_nb_t_dn, 5081, g_cart_grid, &requests[0]); MPI_Irecv((void*)(l + VOLUME), 1, lfield_time_slice_cont, g_nb_t_up, 5081, g_cart_grid, &requests[1]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend((void*)l, 1, lfield_x_slice_gath, g_nb_x_dn, 5091, g_cart_grid, &requests[4]); @@ -80,7 +80,7 @@ void xchange_lexicfield(spinor* const l) { #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)l, 1, lfield_y_slice_gath, g_nb_y_dn, 5101, g_cart_grid, &requests[8]); @@ -88,7 +88,7 @@ void xchange_lexicfield(spinor* const l) { 5101, g_cart_grid, &requests[9]); #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ @@ -103,7 +103,7 @@ void xchange_lexicfield(spinor* const l) { MPI_Irecv((void*)(l + (T + 1) * LX * LY * LZ), 1, lfield_time_slice_cont, g_nb_t_dn, 5082, g_cart_grid, &requests[3]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ MPI_Isend((void*)(l + (LX - 1) * LY * LZ), 1, lfield_x_slice_gath, g_nb_x_up, 5092, g_cart_grid, @@ -112,7 +112,7 @@ void xchange_lexicfield(spinor* const l) { 5092, g_cart_grid, &requests[7]); #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(l + (LY - 1) * LZ), 1, lfield_y_slice_gath, g_nb_y_up, 5102, g_cart_grid, @@ -121,7 +121,7 @@ void xchange_lexicfield(spinor* const l) { g_nb_y_dn, 5102, g_cart_grid, &requests[11]); #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ @@ -135,21 +135,21 @@ void xchange_lexicfield(spinor* const l) { #endif return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(xchange_lexicfield) #endif } /* Here comes the naive version */ /* Using MPI_Sendrecv */ -#else /* _NON_BLOCKING */ +#else /* TM_NON_BLOCKING */ /* exchanges the field l */ void xchange_lexicfield(spinor* const l) { -#ifdef PARALLELXYZT +#ifdef TM_PARALLELXYZT int x0 = 0, x1 = 0, x2 = 0, ix = 0; #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(xchange_lexicfield) #endif @@ -167,7 +167,7 @@ void xchange_lexicfield(spinor* const l) { (void*)(l + (T + 1) * LX * LY * LZ), 1, lfield_time_slice_cont, g_nb_t_dn, 5082, g_cart_grid, &status); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Sendrecv((void*)l, 1, lfield_x_slice_gath, g_nb_x_dn, 5091, @@ -182,7 +182,7 @@ void xchange_lexicfield(spinor* const l) { #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Sendrecv((void*)l, 1, lfield_y_slice_gath, g_nb_y_dn, 5101, @@ -197,7 +197,7 @@ void xchange_lexicfield(spinor* const l) { #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ MPI_Sendrecv((void*)l, 1, lfield_z_slice_gath, g_nb_z_dn, 5503, @@ -214,7 +214,7 @@ void xchange_lexicfield(spinor* const l) { #endif #endif return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(xchange_lexicfield) #endif } @@ -226,20 +226,20 @@ void xchange_lexicfield(spinor* const l) { ***********************************************************************/ /* this version uses non-blocking MPI calls */ -#if (defined _NON_BLOCKING) +#if (defined TM_NON_BLOCKING) void xchange_lexicfield32(spinor32* const l) { MPI_Request requests[16]; MPI_Status status[16]; -#ifdef PARALLELT +#ifdef TM_PARALLELT int reqcount = 4; -#elif defined PARALLELXT +#elif defined TM_PARALLELXT int reqcount = 8; -#elif defined PARALLELXYT +#elif defined TM_PARALLELXYT int reqcount = 12; -#elif defined PARALLELXYZT +#elif defined TM_PARALLELXYZT int reqcount = 16; #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(xchange_lexicfield32) #endif @@ -250,7 +250,7 @@ void xchange_lexicfield32(spinor32* const l) { MPI_Isend((void*)l, 1, lfield_time_slice_cont32, g_nb_t_dn, 5081, g_cart_grid, &requests[0]); MPI_Irecv((void*)(l + VOLUME), 1, lfield_time_slice_cont32, g_nb_t_up, 5081, g_cart_grid, &requests[1]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Isend((void*)l, 1, lfield_x_slice_gath32, g_nb_x_dn, 5091, g_cart_grid, &requests[4]); @@ -259,7 +259,7 @@ void xchange_lexicfield32(spinor32* const l) { #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Isend((void*)l, 1, lfield_y_slice_gath32, g_nb_y_dn, 5101, g_cart_grid, &requests[8]); @@ -267,7 +267,7 @@ void xchange_lexicfield32(spinor32* const l) { 5101, g_cart_grid, &requests[9]); #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ @@ -282,7 +282,7 @@ void xchange_lexicfield32(spinor32* const l) { MPI_Irecv((void*)(l + (T + 1) * LX * LY * LZ), 1, lfield_time_slice_cont32, g_nb_t_dn, 5082, g_cart_grid, &requests[3]); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in x direction */ /* recieve the data from the neighbour on the left in x direction */ MPI_Isend((void*)(l + (LX - 1) * LY * LZ), 1, lfield_x_slice_gath32, g_nb_x_up, 5092, g_cart_grid, @@ -291,7 +291,7 @@ void xchange_lexicfield32(spinor32* const l) { g_nb_x_dn, 5092, g_cart_grid, &requests[7]); #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ MPI_Isend((void*)(l + (LY - 1) * LZ), 1, lfield_y_slice_gath32, g_nb_y_up, 5102, g_cart_grid, @@ -300,7 +300,7 @@ void xchange_lexicfield32(spinor32* const l) { lfield_y_slice_cont32, g_nb_y_dn, 5102, g_cart_grid, &requests[11]); #endif -#if defined PARALLELXYZT +#if defined TM_PARALLELXYZT /* send the data to the neighbour on the right in y direction */ /* recieve the data from the neighbour on the left in y direction */ @@ -314,21 +314,21 @@ void xchange_lexicfield32(spinor32* const l) { #endif return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(xchange_lexicfield32) #endif } /* Here comes the naive version */ /* Using MPI_Sendrecv */ -#else /* _NON_BLOCKING */ +#else /* TM_NON_BLOCKING */ /* exchanges the field l */ void xchange_lexicfield32(spinor32* const l) { -#ifdef PARALLELXYZT +#ifdef TM_PARALLELXYZT int x0 = 0, x1 = 0, x2 = 0, ix = 0; #endif -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst begin(xchange_lexicfield32) #endif @@ -347,7 +347,7 @@ void xchange_lexicfield32(spinor32* const l) { (void*)(l + (T + 1) * LX * LY * LZ), 1, lfield_time_slice_cont32, g_nb_t_dn, 5082, g_cart_grid, &status); -#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in x direction */ /* recieve the data from the neighbour on the right in x direction */ MPI_Sendrecv((void*)l, 1, lfield_x_slice_gath32, g_nb_x_dn, 5091, @@ -362,7 +362,7 @@ void xchange_lexicfield32(spinor32* const l) { #endif -#if (defined PARALLELXYT || defined PARALLELXYZT) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in y direction */ /* recieve the data from the neighbour on the right in y direction */ MPI_Sendrecv((void*)l, 1, lfield_y_slice_gath32, g_nb_y_dn, 5101, @@ -377,7 +377,7 @@ void xchange_lexicfield32(spinor32* const l) { #endif -#if (defined PARALLELXYZT) +#if (defined TM_PARALLELXYZT) /* send the data to the neighbour on the left in z direction */ /* recieve the data from the neighbour on the right in z direction */ MPI_Sendrecv((void*)l, 1, lfield_z_slice_gath32, g_nb_z_dn, 5503, @@ -394,7 +394,7 @@ void xchange_lexicfield32(spinor32* const l) { #endif #endif return; -#ifdef _KOJAK_INST +#ifdef TM_KOJAK_INST #pragma pomp inst end(xchange_lexicfield32) #endif } From 8f39dd2600baf62461c5657120f956ab3e21eb99 Mon Sep 17 00:00:00 2001 From: Mathieu Taillefumier Date: Tue, 10 Feb 2026 17:12:26 +0100 Subject: [PATCH 03/19] [cmake] More work - Add fftw - Add option to compile the tests (OFF by default) --- CMakeLists.txt | 3 ++- cmake/tmlqcd_config_internal.h.in | 3 --- src/bin/CMakeLists.txt | 25 +++++++++++++++++++++++++ src/lib/CMakeLists.txt | 7 +++---- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 39adba1c5..2cacfcc39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.24) +cmake_minimum_required(VERSION 3.30) project( tmlqcd @@ -103,6 +103,7 @@ option(TM_USE_SHMEM "Use shmem API" OFF) option(TM_USE_QUDA "Enable QUDA support" OFF) option(TM_USE_GPROF "Enable gprof profiler" OFF) option(TM_ENABLE_WARNINGS "Enable all warnings" ON) +option(TM_ENABLE_TESTS "Enable tests" OFF) # MPI dependent options cmake_dependent_option( diff --git a/cmake/tmlqcd_config_internal.h.in b/cmake/tmlqcd_config_internal.h.in index 2765a2b7c..89bc753df 100644 --- a/cmake/tmlqcd_config_internal.h.in +++ b/cmake/tmlqcd_config_internal.h.in @@ -93,9 +93,6 @@ /* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ #cmakedefine TM_LARGEFILE_SOURCE -/* Define for large files, on AIX-style hosts. */ -#cmakedefine TM_LARGE_FILES - /* Use even/odd geometry in the gauge fields */ #cmakedefine TM_NEW_GEOMETRY diff --git a/src/bin/CMakeLists.txt b/src/bin/CMakeLists.txt index 29c9c1d8a..2f135ddae 100644 --- a/src/bin/CMakeLists.txt +++ b/src/bin/CMakeLists.txt @@ -17,3 +17,28 @@ foreach(_prog ${tmlqcd_prog}) POSITION_INDEPENDENT_CODE ON LINKER_LANGUAGE "CXX") endforeach() + +if(TM_ENABLE_TESTS) + list( + APPEND + tmlqcd_test_prog + "check_locallity.c;hopping_test.cscalar_prod_r_test.c;test_eigenvalues.c;test_lemon.c" + ) + if(TM_USE_LEMON) + list(APPEND tmlqcd_test_prog test_lemon.c) + endif() + if(TN_USE_QPHIX) + list(APPEND tmlqcd_test_prog qphix_test_Dslash.c) + endif() + + foreach(_prog ${tmlqcd_test_prog}) + add_executable(${_prog} "${CMAKE_SOURCE_DIR}/src/bin/tests/${_prog}.c") + + target_link_libraries(${_prog} PUBLIC hmc) + set_target_properties( + ${_prog} + PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" + POSITION_INDEPENDENT_CODE ON + LINKER_LANGUAGE "CXX") + endforeach() +endif() diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 746b40c0d..ea2f7e41d 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -332,7 +332,7 @@ list( get_rectangle_staples.c rnd_gauge_trafo.c measure_rectangles.c - #invert.c + # invert.c deriv_Sb_D_psi.c mpi_init.c update_momenta_fg.c @@ -414,7 +414,7 @@ endif() # create a target library with namespacing because cmake does not know name # space at all -if (BUILD_SHARED_LIBS) +if(BUILD_SHARED_LIBS) add_library(hmc SHARED "${ALL_SRC};${FLEX_tmlqcd_input_read_OUTPUTS}") else() add_library(hmc STATIC "${ALL_SRC};${FLEX_tmlqcd_input_read_OUTPUTS}") @@ -449,8 +449,7 @@ target_link_libraries( m) target_compile_definitions( - hmc PUBLIC HAVE_CONFIG_H - $<$:${TM_GPU_PLATFORM_DFLAGS}>) + hmc PUBLIC HAVE_CONFIG_H $<$:${TM_GPU_PLATFORM_DFLAGS}>) target_include_directories( hmc From 7315eaeb41769177604c44cc9bb8e892d1b7d344 Mon Sep 17 00:00:00 2001 From: Mathieu Taillefumier Date: Thu, 12 Feb 2026 11:48:58 +0100 Subject: [PATCH 04/19] Removed FindLemon.cmake --- cmake/FindLemon.cmake | 25 ------------------------- 1 file changed, 25 deletions(-) delete mode 100644 cmake/FindLemon.cmake diff --git a/cmake/FindLemon.cmake b/cmake/FindLemon.cmake deleted file mode 100644 index cdeca5e42..000000000 --- a/cmake/FindLemon.cmake +++ /dev/null @@ -1,25 +0,0 @@ -include(FindPackageHandleStandardArgs) - -find_library( - TMLQCD_LEMON_LIBRARIES - NAMES lemon - PATH_SUFFIXES "lib" "lib64") - -find_path( - TMLQCD_LEMON_INCLUDE_DIRS - NAMES lemon.h - PATH_SUFFIXES "include" "include/${_pacakge_name}" "${_package_name}") - -find_package_handle_standard_args(Lemon DEFAULT_MSG TMLQCD_LEMON_LIBRARIES - TMLQCD_LEMON_INCLUDE_DIRS) - -if(NOT TARGET tmlqcd::lemon) - add_library(tmlqcd::lemon INTERFACE IMPORTED) - set_target_properties(tmlqcd::lemon PROPERTIES INTERFACE_LINK_LIBRARIES - "${TMLQCD_LEMON_LIBRARIES}") - set_target_properties(tmlqcd::lemon PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "${TMLQCD_LEMON_INCLUDE_DIRS}") -endif() - -set(TMLQCD_LEMON_FOUND ON) -mark_as_advanced(TMLQCD_LEMON_LIBRARIES TMLQCD_LEMON_INCLUDE_DIRS) From 252d9684ffcacce57d1b5c4bbff73c11ff41b4c6 Mon Sep 17 00:00:00 2001 From: Mathieu Taillefumier Date: Mon, 16 Feb 2026 10:05:47 +0100 Subject: [PATCH 05/19] [cmake] More work --- .../repo/packages/lemonio/package.py | 22 ++- .github/workflows/basic-build.yaml | 38 ++-- .github/workflows/ddalphaamg-build.yaml | 47 ++--- .github/workflows/qphix-build.yaml | 54 ++---- CMakeLists.txt | 81 ++++---- cmake/FindDDAlphaAMG.cmake | 29 --- cmake/FindDDalphaAMG.cmake | 28 +++ cmake/tmlqcd_config_internal.h.in | 12 +- profiling/hmc_mk2/logs/example_log.out | 2 +- src/bin/LapH_ev.c | 180 ------------------ src/bin/benchmark.c | 6 +- src/bin/deriv_mg_tune.c | 2 +- src/bin/hmc_tm.c | 2 +- src/bin/invert.c | 2 +- src/bin/offline_measurement.c | 2 +- src/bin/tests/check_locallity.c | 6 +- src/bin/tests/hopping_test.c | 6 +- src/bin/tests/qphix_test_Dslash.c | 4 +- src/bin/tests/test_eigenvalues.c | 8 +- src/bin/tests/test_lemon.c | 2 +- src/lib/CMakeLists.txt | 15 +- src/lib/DDalphaAMG_interface.c | 32 ++-- src/lib/buffers/utils_generic_exchange.c | 2 +- src/lib/deriv_Sb.c | 18 +- src/lib/geometry_eo.c | 27 +-- src/lib/global.h | 1 - src/lib/init/init.h | 2 +- src/lib/init/init_dirac_halfspinor.c | 18 +- src/lib/init/init_geometry_indices.c | 1 - src/lib/io/utils_write_first_message.c | 6 +- src/lib/linalg/assign.c | 1 - src/lib/linalg/assign_add_mul_r_32.c | 2 +- src/lib/linalg/scalar_prod_r.c | 1 - src/lib/matrix_utils.c | 5 +- src/lib/measure_gauge_action.c | 2 +- src/lib/misc_types.h | 2 +- src/lib/mpi_init.c | 11 +- src/lib/mpi_init.h | 5 +- src/lib/operator/D_psi_body.c | 2 +- src/lib/operator/Hopping_Matrix.c | 4 +- src/lib/operator/Hopping_Matrix_32.c | 4 +- src/lib/operator/halfspinor_body.c | 4 +- src/lib/operator/hopping_bg_dbl.c | 20 +- src/lib/operator/hopping_body_dbl.c | 20 +- src/lib/operator/hopping_sgl.c | 18 +- src/lib/operator/tm_sub_Hopping_Matrix.c | 4 +- src/lib/operator/tm_times_Hopping_Matrix.c | 6 +- src/lib/read_input.l | 2 +- src/lib/smearing/utils_reunitarize_MILC.c | 4 +- src/lib/solver/gram-schmidt.c | 1 - src/lib/test/check_geometry.c | 7 +- src/lib/test/check_overlap.c | 2 +- src/lib/wrapper/lib_wrapper.c | 6 +- src/lib/xchange/xchange_gauge.c | 1 - 54 files changed, 281 insertions(+), 508 deletions(-) delete mode 100644 cmake/FindDDAlphaAMG.cmake create mode 100644 cmake/FindDDalphaAMG.cmake delete mode 100644 src/bin/LapH_ev.c diff --git a/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py b/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py index d70cac492..7508b4b79 100755 --- a/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py +++ b/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py @@ -2,12 +2,13 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) -from spack_repo.builtin.build_systems.autotools import AutotoolsPackage +from spack_repo.builtin.build_systems import cmake +from spack_repo.builtin.build_systems.cmake import CMakePackage, generator from spack.package import * -class Lemonio(AutotoolsPackage): +class Lemonio(AutotoolsPackage, CMakePackage): """LEMON: Lightweight Parallel I/O library for Lattice QCD.""" homepage = "https://github.com/etmc/lemon" @@ -16,13 +17,18 @@ class Lemonio(AutotoolsPackage): version('master', branch='master') - depends_on("autoconf", type="build", when="@master build_system=autotools") - depends_on("automake", type="build", when="@master build_system=autotools") - depends_on("libtool", type="build", when="@master build_system=autotools") + depends_on("libtool", type="build", when="@master build_system=cmake") + depends_on("cmake", type="build", when="master build_system=cmake") depends_on('mpi') - def configure_args(self): - args = [] - args.append('CC={0}'.format(self.spec['mpi'].mpicc)) + generator("ninja") + +class CMakeBuilder(cmake.CMakeBuilder): + def cmake_args(self): + spec = self.spec + args = [ + self.define_from_variant("DBUILD_SHARED_LIBS" "shared"), + ] return args + diff --git a/.github/workflows/basic-build.yaml b/.github/workflows/basic-build.yaml index afe18e145..d46b67830 100644 --- a/.github/workflows/basic-build.yaml +++ b/.github/workflows/basic-build.yaml @@ -35,16 +35,16 @@ jobs: repository: usqcd-software/c-lime path: lime - - name: autogen_lime + - name: create_builddir_lime working-directory: ${{github.workspace}}/lime - run: ./autogen.sh && mkdir build + run: mkdir build - name: build_lime working-directory: ${{github.workspace}}/lime/build run: | CC=gcc \ CFLAGS="-march=haswell -mtune=haswell -O2" \ - ../configure --prefix=$(pwd)/install_dir + cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir .. >> config.log make -j make install @@ -61,10 +61,9 @@ jobs: repository: etmc/lemon path: lemon - - name: autogen_lemon + - name: create_builddir_lemon working-directory: ${{github.workspace}}/lemon run: | - autoreconf -i -f mkdir build - name: build_lemon @@ -72,9 +71,9 @@ jobs: run: | CC=mpicc \ CFLAGS="-march=haswell -mtune=haswell -O2" \ - ../configure --prefix=$(pwd)/install_dir + cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir .. make -j - make install + make install > config.log - name: Archive lemon config.log if: ${{ always() }} @@ -92,28 +91,19 @@ jobs: shell: bash run: mkdir ${{github.workspace}}/main/build - - name: autogen_tmlqcd - working-directory: ${{github.workspace}}/main - run: autoconf - - name: configure_and_build shell: bash working-directory: ${{github.workspace}}/main/build run: | - CC=mpicc CXX=mpicxx \ - LDFLAGS="-fopenmp" \ CFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ CXXFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ - ../configure \ - --enable-mpi \ - --with-mpidimension=4 \ - --enable-omp \ - --disable-sse2 \ - --disable-sse3 \ - --with-limedir=${{github.workspace}}/lime/build/install_dir \ - --with-lemondir=${{github.workspace}}/lemon/build/install_dir \ - --with-lapack="-lblas -llapack" || cat config.log - make -j + cmake -DCMAKE_PREFIX_PATH="${{github.workspace}}/lime/build/install_dir;${{github.workspace}}/lemon/build/install_dir" \ + -DTM_USE_MPI=ON \ + -DTM_USE_OMP=ON \ + -DTM_USE_LEMON=ON \ + .. > config.log + cat config.log + make -j - name: Archive tmLQCD config.log if: ${{ always() }} @@ -125,7 +115,7 @@ jobs: - name: nf2_rgmixedcg_hmc_tmcloverdetratio working-directory: ${{github.workspace}}/main/build run: | - mpirun -np 2 ./hmc_tm \ + mpirun -np 2 src/bin/hmc_tm \ -f ../doc/sample-input/sample-hmc-rgmixedcg-tmcloverdetratio.input - name: Archive nf2_rgmixedcg_hmc_tmcloverdetratio output diff --git a/.github/workflows/ddalphaamg-build.yaml b/.github/workflows/ddalphaamg-build.yaml index f50ffcae9..509fb28b6 100644 --- a/.github/workflows/ddalphaamg-build.yaml +++ b/.github/workflows/ddalphaamg-build.yaml @@ -40,19 +40,16 @@ jobs: repository: usqcd-software/c-lime path: lime - - name: autogen_lime + - name: create_builddir_lime working-directory: ${{github.workspace}}/lime - run: ./autogen.sh - - - name: create_lime_builddir - run: mkdir ${{github.workspace}}/lime/build + run: mkdir build - name: build_lime working-directory: ${{github.workspace}}/lime/build run: | CC=gcc \ CFLAGS="-march=haswell -mtune=haswell -O2" \ - ../configure --prefix=$(pwd)/install_dir + cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir .. >> config.log make -j make install @@ -69,23 +66,20 @@ jobs: repository: etmc/lemon path: lemon - - name: create_lemon_builddir - run: mkdir ${{github.workspace}}/lemon/build - - - name: autogen_lemon + - name: create_builddir_lemon working-directory: ${{github.workspace}}/lemon - run: autoreconf -i -f + run: | + mkdir build - name: build_lemon working-directory: ${{github.workspace}}/lemon/build run: | CC=mpicc \ CFLAGS="-march=haswell -mtune=haswell -O2" \ - ../configure \ - --prefix=$(pwd)/install_dir + cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir .. make -j - make install - + make install > config.log + - name: Archive lemon config.log if: ${{ always() }} uses: actions/upload-artifact@v4 @@ -111,10 +105,6 @@ jobs: shell: bash run: mkdir ${{github.workspace}}/main/build - - name: autogen_tmlqcd - working-directory: ${{github.workspace}}/main - run: autoconf - - name: configure_and_build shell: bash working-directory: ${{github.workspace}}/main/build @@ -123,22 +113,19 @@ jobs: LDFLAGS="-fopenmp" \ CFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ CXXFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ - ../configure \ - --enable-mpi \ - --with-mpidimension=4 \ - --enable-omp \ - --disable-sse2 \ - --disable-sse3 \ - --with-limedir=${{github.workspace}}/lime/build/install_dir \ - --with-lemondir=${{github.workspace}}/lemon/build/install_dir \ - --with-DDalphaAMG=${{github.workspace}}/ddalphaamg \ - --with-lapack="-lblas -llapack" || cat config.log + cmake -DCMAKE_PREFIX_PATH="${{github.workspace}}/lime/build/install_dir;${{github.workspace}}/lemon/build/install_dir;${{github.workspace}}/ddalphaamg" \ + -DTM_USE_MPI=ON \ + -DTM_USE_OMP=ON \ + -DTM_USE_LEMON=ON \ + -DTM_USE_DDalphaAMG=ON \ + .. > config.log + cat config.log make -j - name: nf2_ddalphaamg_hmc_tmcloverdetratio working-directory: ${{github.workspace}}/main/build run: | - mpirun -np 2 ./hmc_tm \ + mpirun -np 2 src/bin/hmc_tm \ -f ../doc/sample-input/sample-hmc-ddalphaamg-tmcloverdetratio.input - name: Archive nf2_ddalphaamg_hmc_tmcloverdetratio output diff --git a/.github/workflows/qphix-build.yaml b/.github/workflows/qphix-build.yaml index 1b39cdf34..eef1b5055 100644 --- a/.github/workflows/qphix-build.yaml +++ b/.github/workflows/qphix-build.yaml @@ -35,16 +35,16 @@ jobs: repository: usqcd-software/c-lime path: lime - - name: autogen_lime + - name: create_builddir_lime working-directory: ${{github.workspace}}/lime - run: ./autogen.sh && mkdir build + run: mkdir build - name: build_lime working-directory: ${{github.workspace}}/lime/build run: | CC=gcc \ CFLAGS="-march=haswell -mtune=haswell -O2" \ - ../configure --prefix=$(pwd)/install_dir + cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir .. >> config.log make -j make install @@ -61,10 +61,9 @@ jobs: repository: etmc/lemon path: lemon - - name: autogen_lemon + - name: create_builddir_lemon working-directory: ${{github.workspace}}/lemon run: | - autoreconf -i -f mkdir build - name: build_lemon @@ -72,11 +71,10 @@ jobs: run: | CC=mpicc \ CFLAGS="-march=haswell -mtune=haswell -O2" \ - ../configure \ - --prefix=$(pwd)/install_dir + cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir .. make -j - make install - + make install > config.log + - name: Archive lemon config.log if: ${{ always() }} uses: actions/upload-artifact@v4 @@ -84,7 +82,6 @@ jobs: name: lemon_config_output path: ${{github.workspace}}/lemon/build/config.log - - name: get_qmp uses: actions/checkout@v4 with: @@ -151,9 +148,10 @@ jobs: -DCMAKE_C_COMPILER=mpicc \ -DCMAKE_C_FLAGS="-std=c99 -O2 -mavx2 -mfma -mtune=haswell -march=haswell -fopenmp" \ -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir \ - .. + .. >> config.log VERBOSE=1 make -j $(( ${nb_cores} + 3 )) - make install + make install > config.log + cat config.log - name: get_tmlqcd uses: actions/checkout@v4 @@ -164,31 +162,21 @@ jobs: shell: bash run: mkdir ${{github.workspace}}/main/build - - name: autogen_tmlqcd - working-directory: ${{github.workspace}}/main - run: autoconf - - name: configure_and_build shell: bash working-directory: ${{github.workspace}}/main/build run: | CC=mpicc CXX=mpicxx \ - LDFLAGS="-fopenmp" \ - CFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ - CXXFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ - ../configure \ - --enable-mpi \ - --with-mpidimension=4 \ - --enable-omp \ - --disable-sse2 \ - --disable-sse3 \ - --with-limedir=${{github.workspace}}/lime/build/install_dir \ - --with-lemondir=${{github.workspace}}/lemon/build/install_dir \ - --with-lapack="-lblas -llapack" \ - --with-qmpdir=${{github.workspace}}/qmp/build/install_dir \ - --with-qphixdir=${{github.workspace}}/qphix/build/install_dir \ - --enable-qphix-soalen=4 || cat config.log - make -j + cmake -DCMAKE_PREFIX_PATH="${{github.workspace}}/lime/build/install_dir;${{github.workspace}}/lemon/build/install_dir;${{github.workspace}}/qmp/build/install_dir;${{github.workspace}}/qphix/build/install_dir" \ + -DTM_USE_MPI=ON \ + -DTM_USE_OMP=ON \ + -DTM_USE_LEMON=ON \ + -DTM_USE_QPHIX=ON \ + -DCMAKE_CXXFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ + -DCMAKE_CFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ + -DQPHIX_DIR="${{github.workspace}}/qphix/build/install_dir" \ + .. + make -j > config.log - name: Archive tmLQCD config.log if: ${{ always() }} @@ -200,7 +188,7 @@ jobs: - name: nf2_qphix_hmc_tmcloverdetratio working-directory: ${{github.workspace}}/main/build run: | - mpirun -np 2 ./hmc_tm \ + mpirun -np 2 src/bin/hmc_tm \ -f ../doc/sample-input/sample-hmc-qphix-tmcloverdetratio.input - name: Archive nf2_qphix_hmc_tmcloverdetratio output diff --git a/CMakeLists.txt b/CMakeLists.txt index 2cacfcc39..a375ad14b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,9 +23,6 @@ endif() # ================================================================================================= # PROJECT AND VERSION include(CMakeDependentOption) -include(CheckSymbolExists) -include(CheckLibraryExists) -include(CheckFunctionExists) include(GNUInstallDirs) cmake_policy(SET CMP0048 NEW) @@ -56,16 +53,18 @@ endif() find_package(PkgConfig) -# ############################################################################## -# Define the paths for static libraries and executables -# ############################################################################## -set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY - ${cp2k_BINARY_DIR}/lib - CACHE PATH "Single output directory for building all libraries.") - # Search for rocm in common locations -foreach(__var ROCM_ROOT CRAY_ROCM_ROOT ORNL_ROCM_ROOT CRAY_ROCM_PREFIX - ROCM_PREFIX CRAY_ROCM_DIR) +foreach( + __var + ROCM_ROOT + HIP_ROOT + HIP_PATH + CRAY_ROCM_ROOT + ORNL_ROCM_ROOT + CRAY_ROCM_PREFIX + ROCM_PREFIX + CRAY_ROCM_DIR + ROCM_PATH) if($ENV{${__var}}) list(APPEND CMAKE_PREFIX_PATH $ENV{__var}) set(ROCM_PATH @@ -75,7 +74,7 @@ foreach(__var ROCM_ROOT CRAY_ROCM_ROOT ORNL_ROCM_ROOT CRAY_ROCM_PREFIX endforeach() option(CMAKE_POSITION_INDEPENDENT_CODE "Enable position independent code" ON) -option(BUILD_SHARED_LIBS "Enable shared library" ON) +option(BUILD_SHARED_LIBS "Enable shared library" OFF) option(TM_USE_FFTW "Enable fftw support" OFF) option(TM_USE_MPI "Enable MPI support" OFF) option(TM_USE_CUDA "Enable QUDA support" OFF) @@ -93,15 +92,12 @@ set(TM_ENABLE_ALIGNMENT set_property(CACHE TM_ENABLE_ALIGNMENT PROPERTY STRINGS "auto" "none" "16" "32" "64") -option(TM_BGL_DRAM "use BGL dram window (BGL only!)" ON) option(TM_USE_OPTIMIZATION "enable optimisation" ON) -option(TM_USE_GAUGECOPY "Enable use of a copy of the gauge field" ON) +option(TM_USE_GAUGE_COPY "Enable use of a copy of the gauge field" ON) option(TM_USE_HALFSPINOR "Use a Dirac Op. with halfspinor exchange" ON) -option(TM_USE_TSPLITPAR "Enable timeslice-splitted communications" ON) option(TM_USE_QPHIX "enable QPhiX" OFF) option(TM_USE_SHMEM "Use shmem API" OFF) option(TM_USE_QUDA "Enable QUDA support" OFF) -option(TM_USE_GPROF "Enable gprof profiler" OFF) option(TM_ENABLE_WARNINGS "Enable all warnings" ON) option(TM_ENABLE_TESTS "Enable tests" OFF) @@ -203,7 +199,7 @@ if(TM_USE_HDF5) endif() if(TM_USE_LEMON) - find_package(Clemon REQUIRED) + find_package(lemon REQUIRED) endif() find_package(CLime REQUIRED) @@ -231,6 +227,8 @@ endif() if(TM_USE_CUDA OR QUDA_TARGET_CUDA) enable_language(CUDA) + + # placeholder for nvhpc for future use if(TM_USE_NVHPC) find_package(NVHPC REQUIRED COMPONENTS CUDA MATH HOSTUTILS NCCL) else() @@ -238,11 +236,11 @@ if(TM_USE_CUDA OR QUDA_TARGET_CUDA) endif() endif() +# We may want to use hip-cuda for development or debugging purposes especially +# if AMD GPU access is not possible. So allow it + if(TM_USE_HIP OR QUDA_TARGET_HIP) enable_language(hip) - - # we may want to use hip-cuda for development or debugging purposes especially - # if AMD GPU access is not possible. So allow it if(TM_USE_CUDA_HIP) find_package(CUDA) endif() @@ -254,14 +252,15 @@ if(TM_USE_HIP OR QUDA_TARGET_HIP) endif() endif() -if(TM_USE_QPIHX) - find_package(QPhiX REQUIRED) +if(TM_USE_QPHIX) + find_package(QPhiX REQUIRED CONFIG) + message("${QPhiX_LIBRARIES}") if(NOT TARGET tmlqcd::qphix) add_library(tmlqcd::qphix INTERFACE IMPORTED) set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_LINK_LIBRARIES - "${QPHIX_LIBRARIES}") + "${QPhiX_LIBRARIES}") set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "${QPHIX_INCLUDE_DIRS}") + "${QPhiX_INCLUDE_DIRS}") endif() endif() @@ -274,17 +273,7 @@ if(TM_USE_FFTW) endif() if(TM_USE_DDalphaAMG) - find_package(DDAlphaAMG REQUIRED) -endif() - -# gprofiler - -if(TM_USE_GPROF) - set(PROFILE_FLAGS "-pg;-g") - if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "powerpc|powerpc64") - list(APPEND PROFILE_FLAGS "-qfullpath") - endif() - add_compile_options($:$PROFILE_FLAGS>) + find_package(DDalphaAMG REQUIRED) endif() if(TM_ENABLE_WARNINGS) @@ -292,31 +281,26 @@ if(TM_ENABLE_WARNINGS) $<$:-Wall>) endif() -# check for the presence of clock_gettime in libc or librt -check_symbol_exists(clock_gettime "time.h" TM_CLOCK_GETTIME) -check_library_exists(rt clock_gettime "" TM_CLOCK_GETTIME_IN_RT) -check_function_exists(fseeko TM_FSEEKO) - # set the parallelization if(TM_USE_MPI) - if(TM_MPI_DIMENSION EQUAL "1") + if(TM_MPI_DIMENSION STREQUAL "1") # T parallelisation set(TM_PARALLELT ON) - elseif(TM_MPI_DIMENSION EQUAL "2") + elseif(TM_MPI_DIMENSION STREQUAL "2") # XT parallelisation set(TM_PARALLELXT ON) - elseif(TM_MPI_DIMENSION EQUAL "3") + elseif(TM_MPI_DIMENSION STREQUAL "3") set(TM_PARALLELXYT ON) # XYZ parallelisation - elseif(TM_MPI_DIMENSION EQUAL "4") + elseif(TM_MPI_DIMENSION STREQUAL "4") # timeslice-splitted communications set(TM_PARALLELXYZT ON) - elseif(TM_MPI_DIMENSION EQUAL "X") + elseif(TM_MPI_DIMENSION STREQUAL "X") set(TM_PARALLELX ON) - elseif(TM_MPI_DIMENSION EQUAL "XY") + elseif(TM_MPI_DIMENSION STREQUAL "XY") set(TM_PARALLELXY ON) - elseif(TM_MPI_DIMENSION EQUAL "XYZ") + elseif(TM_MPI_DIMENSION STREQUAL "XYZ") set(TM_PARALLELXYZ ON) else() set(TM_PARALLELXYZT ON) @@ -346,7 +330,6 @@ if(DEFINED GIT_EXE AND EXISTS "${PROJECT_SOURCE_DIR}/.git") ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) message(STATUS "git hash ${TM_SHA}") else() - # set(TM_GIT_BRANCH "release v${SIRIUS_VERSION}") set(TM_SHA "https://github.com/etmc/tmLQCD/releases/tag/rel-${TMLQCD_VERSION_MAJOR}-${TMLQCD_VERSION_MINOR}" ) diff --git a/cmake/FindDDAlphaAMG.cmake b/cmake/FindDDAlphaAMG.cmake deleted file mode 100644 index f42c943cc..000000000 --- a/cmake/FindDDAlphaAMG.cmake +++ /dev/null @@ -1,29 +0,0 @@ -include(FindPackageHandleStandardArgs) - -find_library( - TM_DDALPHAAMG_LIBRARIES - NAMES DDalphaAMG DDalphaAMG_devel - PATH_SUFFIXES "lib" "lib64") - -find_path( - TM_DDALPHAAMG_INCLUDE_DIRS - NAMES DDalphaAMG.h - PATH_SUFFIXES "include" "include/${_pacakge_name}" "${_package_name}") - -find_package_handle_standard_args( - DDAlphaAMG DEFAULT_MSG TMLQCD_DDALPHAAMG_LIBRARIES - TMLQCD_DDALPHAAMG_INCLUDE_DIRS) - -if(NOT TARGET tmlqcd::DDalphaAMG) - add_library(tmlqcd::DDalphaAMG INTERFACE IMPORTED) - set_target_properties( - tmlqcd::DDalphaAMG PROPERTIES INTERFACE_LINK_LIBRARIES - "${TMLQCD_DDALPHAAMG_LIBRARIES}") - set_target_properties( - tmlqcd::DDalphaAMG PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "${TMLQCD_DDALPHAAMG_INCLUDE_DIRS}") -endif() - -set(TMLQCD_DDALPHAAMG_FOUND ON) -mark_as_advanced(TMLQCD_DDALPHAAMG_FOUND TMLQCD_DDALPHAAMG_LIBRARIES - TMLQCD_DDALPHAAMG_INCLUDE_DIRS) diff --git a/cmake/FindDDalphaAMG.cmake b/cmake/FindDDalphaAMG.cmake new file mode 100644 index 000000000..5f0d2450f --- /dev/null +++ b/cmake/FindDDalphaAMG.cmake @@ -0,0 +1,28 @@ +include(FindPackageHandleStandardArgs) + +find_library( + TM_DDALPHAAMG_LIBRARIES + NAMES DDalphaAMG DDalphaAMG_devel + PATH_SUFFIXES "lib" "lib64") + +find_path( + TM_DDALPHAAMG_INCLUDE_DIRS + NAMES DDalphaAMG.h + PATH_SUFFIXES "include") + +find_package_handle_standard_args( + DDalphaAMG DEFAULT_MSG TM_DDALPHAAMG_LIBRARIES TM_DDALPHAAMG_INCLUDE_DIRS) + +if(TM_DDALPHAAMG_LIBRARIES + AND TM_DDALPHAAMG_INCLUDE_DIRS + AND NOT TARGET tmlqcd::DDalphaAMG) + message("INCLUDE: ${TM_DDALPHAAMG_INCLUDE_DIRS}") + add_library(tmlqcd::DDalphaAMG INTERFACE IMPORTED) + set_property(TARGET tmlqcd::DDalphaAMG PROPERTY INTERFACE_LINK_LIBRARIES + "${TM_DDALPHAAMG_LIBRARIES}") + set_property( + TARGET tmlqcd::DDalphaAMG PROPERTY INTERFACE_INCLUDE_DIRECTORIES + "${TM_DDALPHAAMG_INCLUDE_DIRS}") +endif() + +mark_as_advanced(TM_DDALPHAAMG_LIBRARIES TM_DDALPHAAMG_INCLUDE_DIRS) diff --git a/cmake/tmlqcd_config_internal.h.in b/cmake/tmlqcd_config_internal.h.in index 89bc753df..fb8d7d818 100644 --- a/cmake/tmlqcd_config_internal.h.in +++ b/cmake/tmlqcd_config_internal.h.in @@ -12,9 +12,6 @@ /* Define to 1 if you have the `lemon' library (-llemon). */ #cmakedefine TM_USE_LEMON -/* 1 if clock_gettime is available for use in benchmark */ -#cmakedefine TM_CLOCK_GETTIME - /* Compile with MPI support */ #cmakedefine TM_USE_MPI @@ -31,9 +28,9 @@ #define PACKAGE_BUGREPORT "@PACKAGE_BUGREPORT@" /* Define to the full name of this package. */ -#define PACKAGE_NAME "@PROJECT_DESCRIPTION@" +#define PACKAGE_NAME "@tmlqcd_DESCRIPTION@" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "@PROJECT_VERSION@" +#define PACKAGE_STRING "@tmlqcd_VERSION@" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "@PACKAGE_TARNAME@" @@ -65,9 +62,6 @@ /* Fixed volume at compiletime */ #cmakedefine TM_FIXEDVOLUME -/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ -#cmakedefine TM_FSEEKO - /* Alignment for arrays -- necessary for SSE and automated vectorization */ #define ALIGN_BASE @ALIGN_BASE@ @@ -88,7 +82,7 @@ #cmakedefine TM_FILE_OFFSET_BITS @TMLQCD_FILE_OFFSET_BITS@ /* Construct an extra copy of the gauge fields */ -#cmakedefine TM_USE_GAUGECOPY +#cmakedefine TM_USE_GAUGE_COPY /* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ #cmakedefine TM_LARGEFILE_SOURCE diff --git a/profiling/hmc_mk2/logs/example_log.out b/profiling/hmc_mk2/logs/example_log.out index 22ec86ec9..642963b16 100644 --- a/profiling/hmc_mk2/logs/example_log.out +++ b/profiling/hmc_mk2/logs/example_log.out @@ -270,7 +270,7 @@ operator 0 parsed line 229 This is the hmc code for twisted mass Wilson QCD Version 5.2.0, commit 51cf008a89944ecdd9345cdb62aaf0a203a7f306 -# The code is compiled with -DTM_GAUGE_COPY +# The code is compiled with -DTM_USE_GAUGE_COPY # The code is compiled with -DTM_USE_HALFSPINOR # the code is compiled for non-blocking MPI calls (spinor and gauge) # the code is compiled with openMP support diff --git a/src/bin/LapH_ev.c b/src/bin/LapH_ev.c deleted file mode 100644 index 08e810b36..000000000 --- a/src/bin/LapH_ev.c +++ /dev/null @@ -1,180 +0,0 @@ -/*********************************************************************** - * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach - * - * This file is part of tmLQCD. - * - * tmLQCD is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * tmLQCD is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with tmLQCD. If not, see . - ***********************************************************************/ -/* - * Program for computing the eigensystem of the Laplacian operator - * Authors Luigi Scorzato, Marco Cristoforetti - * - * - *******************************************************************************/ - -#ifdef HAVE_CONFIG_H -#include "tmlqcd_config.h" -#else -#error "no tmlqcd_config.h" -#endif -#include -#include -#include -#include -#include -#ifdef TM_USE_MPI -#include -#endif -#include -#include -#include "geometry_eo.h" -#include "global.h" -#include "init/init.h" -#include "mpi_init.h" -#include "ranlxd.h" -#include "read_input.h" -#include "solver/eigenvalues_Jacobi.h" -#include "start.h" -#include "su3.h" -#include "xchange/xchange.h" - -int main(int argc, char *argv[]) { - int tslice, j, k; - char conf_filename[50]; - -#ifdef TM_USE_MPI - MPI_Init(&argc, &argv); -#endif - - /* Read the input file */ - read_input("LapH.input"); - - tmlqcd_mpi_init(argc, argv); - - if (g_proc_id == 0) { -#ifdef TM_GAUGE_COPY - printf("# The code was compiled with -DTM_GAUGE_COPY\n"); -#endif -#ifdef TM_USE_HALFSPINOR - printf("# The code was compiled with -DTM_USE_HALFSPINOR\n"); -#endif -#ifdef TM_USE_SHMEM - printf("# the code was compiled with -DTM_USE_SHMEM\n"); -#ifdef TM_PERSISTENT - printf("# the code was compiled for persistent MPI calls (halfspinor only)\n"); -#endif -#endif -#ifdef TM_USE_MPI -#ifdef TM_NON_BLOCKING - printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n"); -#endif -#endif - printf("\n"); - fflush(stdout); - } - -#ifndef WITHLAPH - printf(" Error: WITHLAPH not defined"); - exit(0); -#endif -#ifdef TM_USE_MPI -#ifndef _INDEX_INDEP_GEOM - printf(" Error: _INDEX_INDEP_GEOM not defined"); - exit(0); -#endif -#ifndef _USE_TSPLITPAR - printf(" Error: _USE_TSPLITPAR not defined"); - exit(0); -#endif -#endif -#ifdef TM_FIXEDVOLUME - printf(" Error: TM_FIXEDVOLUME not allowed"); - exit(0); -#endif - - init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); - init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); - - if (g_proc_id == 0) { - fprintf(stdout, "The number of processes is %d \n", g_nproc); - printf("# The lattice size is %d x %d x %d x %d\n", (int)(T * g_nproc_t), (int)(LX * g_nproc_x), - (int)(LY * g_nproc_y), (int)(g_nproc_z * LZ)); - printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY), - (int)LZ); - printf("# Computing LapH eigensystem \n"); - - fflush(stdout); - } - - /* define the geometry */ - geometry(); - - start_ranlux(1, 123456); - - /* Read Gauge field */ - sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); - if (g_cart_id == 0) { - printf("#\n# Trying to read gauge field from file %s in %s precision.\n", conf_filename, - (gauge_precision_read_flag == 32 ? "single" : "double")); - fflush(stdout); - } - if ((j = read_gauge_field(conf_filename, g_gauge_field)) != 0) { - fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", j, conf_filename); - exit(-2); - } - - if (g_cart_id == 0) { - printf("# Finished reading gauge field.\n"); - fflush(stdout); - } - -#ifdef TM_USE_MPI - /*For parallelization: exchange the gaugefield */ - xchange_gauge(g_gauge_field); -#endif - - /* Init Jacobi field */ - init_jacobi_field(SPACEVOLUME + SPACERAND, 3); - -#ifdef TM_USE_MPI - { - /* for debugging in parallel set i_gdb = 0 */ - volatile int i_gdb = 8; - char hostname[256]; - gethostname(hostname, sizeof(hostname)); - printf("PID %d on %s ready for attach\n", getpid(), hostname); - fflush(stdout); - if (g_cart_id == 0) { - while (0 == i_gdb) { - sleep(5); - } - } - } - - MPI_Barrier(MPI_COMM_WORLD); -#endif - - for (k = 0; k < 3; k++) random_jacobi_field(g_jacobi_field[k], SPACEVOLUME); - - /* Compute LapH Eigensystem */ - - for (tslice = 0; tslice < T; tslice++) { - eigenvalues_Jacobi(&no_eigenvalues, 5000, eigenvalue_precision, 0, tslice, nstore); - } - -#ifdef TM_USE_MPI - MPI_Finalize(); -#endif - return (0); -} diff --git a/src/bin/benchmark.c b/src/bin/benchmark.c index 72d8c8f4d..b2f4ee68c 100644 --- a/src/bin/benchmark.c +++ b/src/bin/benchmark.c @@ -123,8 +123,8 @@ int main(int argc, char *argv[]) { tmlqcd_mpi_init(argc, argv); if (g_proc_id == 0) { -#ifdef TM_GAUGE_COPY - printf("# The code was compiled with -DTM_GAUGE_COPY\n"); +#ifdef TM_USE_GAUGE_COPY + printf("# The code was compiled with -DTM_USE_GAUGE_COPY\n"); #endif #ifdef TM_USE_HALFSPINOR printf("# The code was compiled with -DTM_USE_HALFSPINOR\n"); @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) { fflush(stdout); } -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); diff --git a/src/bin/deriv_mg_tune.c b/src/bin/deriv_mg_tune.c index 75595bc60..f65b22c48 100644 --- a/src/bin/deriv_mg_tune.c +++ b/src/bin/deriv_mg_tune.c @@ -136,7 +136,7 @@ int main(int argc, char *argv[]) { g_mu = g_mu1; -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); status += init_gauge_field_32(VOLUMEPLUSRAND + g_dbw2rand, 1); #else diff --git a/src/bin/hmc_tm.c b/src/bin/hmc_tm.c index 0d95a3b3c..399362d0b 100644 --- a/src/bin/hmc_tm.c +++ b/src/bin/hmc_tm.c @@ -168,7 +168,7 @@ int main(int argc, char *argv[]) { g_mu = g_mu1; -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); status += init_gauge_field_32(VOLUMEPLUSRAND + g_dbw2rand, 1); #else diff --git a/src/bin/invert.c b/src/bin/invert.c index c3111decb..bb6f15c10 100644 --- a/src/bin/invert.c +++ b/src/bin/invert.c @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) { g_dbw2rand = 0; #endif -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); j += init_gauge_field_32(VOLUMEPLUSRAND, 1); #else diff --git a/src/bin/offline_measurement.c b/src/bin/offline_measurement.c index 72a828fb7..c1422858f 100644 --- a/src/bin/offline_measurement.c +++ b/src/bin/offline_measurement.c @@ -127,7 +127,7 @@ int main(int argc, char *argv[]) { g_dbw2rand = 0; #endif -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); diff --git a/src/bin/tests/check_locallity.c b/src/bin/tests/check_locallity.c index f03806f21..01d12826b 100644 --- a/src/bin/tests/check_locallity.c +++ b/src/bin/tests/check_locallity.c @@ -18,13 +18,13 @@ ***********************************************************************/ #include -#include #include #include #include #include #include #include +#include #ifdef TM_USE_MPI #include #endif @@ -77,7 +77,7 @@ int main(int argc, char *argv[]) { double *norm; struct stout_parameters params_smear; -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY int kb = 0; #endif #ifdef TM_USE_MPI @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) { g_dbw2rand = 0; #endif -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); diff --git a/src/bin/tests/hopping_test.c b/src/bin/tests/hopping_test.c index da60c83ba..0e5ff03e7 100644 --- a/src/bin/tests/hopping_test.c +++ b/src/bin/tests/hopping_test.c @@ -102,8 +102,8 @@ int main(int argc, char *argv[]) { tmlqcd_mpi_init(argc, argv); if (g_proc_id == 0) { -#ifdef TM_GAUGE_COPY - printf("# The code was compiled with -DTM_GAUGE_COPY\n"); +#ifdef TM_USE_GAUGE_COPY + printf("# The code was compiled with -DTM_USE_GAUGE_COPY\n"); #endif #ifdef TM_USE_HALFSPINOR printf("# The code was compiled with -DTM_USE_HALFSPINOR\n"); @@ -123,7 +123,7 @@ int main(int argc, char *argv[]) { fflush(stdout); } -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); diff --git a/src/bin/tests/qphix_test_Dslash.c b/src/bin/tests/qphix_test_Dslash.c index b4218d3e6..41e2602a4 100644 --- a/src/bin/tests/qphix_test_Dslash.c +++ b/src/bin/tests/qphix_test_Dslash.c @@ -105,7 +105,7 @@ int main(int argc, char* argv[]) { tmlqcd_mpi_init(argc, argv); g_dbw2rand = 0; -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND, 1); #else init_gauge_field(VOLUMEPLUSRAND, 0); @@ -180,7 +180,7 @@ int main(int argc, char* argv[]) { #endif g_update_gauge_copy = 1; -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY update_backward_gauge(g_gauge_field); #endif diff --git a/src/bin/tests/test_eigenvalues.c b/src/bin/tests/test_eigenvalues.c index 759d8dd2f..c52d29cf8 100644 --- a/src/bin/tests/test_eigenvalues.c +++ b/src/bin/tests/test_eigenvalues.c @@ -227,7 +227,7 @@ int main(int argc, char *argv[]) { g_eps_sq_acc = g_eps_sq_acc1; g_eps_sq_force = g_eps_sq_force1; -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); @@ -277,8 +277,8 @@ int main(int argc, char *argv[]) { #ifdef TM_NEW_GEOMETRY printf("# The code was compiled with -DTM_NEW_GEOMETRY\n"); #endif -#ifdef TM_GAUGE_COPY - printf("# The code was compiled with -DTM_GAUGE_COPY\n"); +#ifdef TM_USE_GAUGE_COPY + printf("# The code was compiled with -DTM_USE_GAUGE_COPY\n"); #endif printf("# The lattice size is %d x %d x %d x %d\n", (int)(T * g_nproc_t), (int)(LX * g_nproc_x), (int)(LY), (int)(LZ)); @@ -430,7 +430,7 @@ int main(int argc, char *argv[]) { #ifdef TM_USE_MPI xchange_gauge(g_gauge_field); #endif -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY update_backward_gauge(); #endif diff --git a/src/bin/tests/test_lemon.c b/src/bin/tests/test_lemon.c index 3cef7689c..9ef46be7b 100644 --- a/src/bin/tests/test_lemon.c +++ b/src/bin/tests/test_lemon.c @@ -66,7 +66,7 @@ int main(int argc, char *argv[]) { tmlqcd_mpi_init(argc, argv); -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index ea2f7e41d..ebed35308 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -370,7 +370,7 @@ list( list(APPEND TEST_SRC_C test/check_xchange.c test/check_geometry.c test/overlaptests.c) if(TM_USE_QPHIX) - list(APPEND MAIN_SRC_C QphiX/qphix_interface.cpp) + list(APPEND MAIN_SRC_C qphix/qphix_interface.cpp) endif() if(TM_USE_QUDA) @@ -404,11 +404,11 @@ include_directories( # cmake 4.0 uses a different syntax for the option if(CMAKE_MAJOR_VERSION LESS 4) - flex_target(tmlqcd_input_read read_input.l read_input.c - COMPILE_FLAGS "-Ca -Ptmlqcd") + flex_target(tmlqcd_input_read read_input.l ${CMAKE_BINARY_DIR}/read_input.c + COMPILE_FLAGS "-Ca -Ptmlqcd -i") else() - flex_target(tmlqcd_input_read read_input.l read_input.c OPTIONS - "-Ca -Ptmlqcd") + flex_target(tmlqcd_input_read read_input.l ${CMAKE_BINARY_DIR}/read_input.c OPTIONS + "-Ca -Ptmlqcd -i") endif() # create a target library with namespacing because cmake does not know name @@ -425,8 +425,7 @@ set_target_properties(hmc PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION 1) # define a library and add the dependencies target_link_libraries( hmc - PUBLIC $<$:rt> - $<$:tmlqcd::DDalphaAMG> + PUBLIC $<$:tmlqcd::DDalphaAMG> $<$:tmlqcd::qphix> $<$:tmlqcd::fftw3> $<$:QUDA::quda> @@ -439,7 +438,7 @@ target_link_libraries( roc::hipblas hip::host> tmlqcd::clime - $<$:clemon::lemon> + $<$:lemon::lemon> ${LAPACK_LIBRARIES} ${BLAS_LIBRARIES} $<$:MPI::MPI_C diff --git a/src/lib/DDalphaAMG_interface.c b/src/lib/DDalphaAMG_interface.c index 80bff4fcc..bf2da4bef 100644 --- a/src/lib/DDalphaAMG_interface.c +++ b/src/lib/DDalphaAMG_interface.c @@ -207,7 +207,8 @@ static inline int MG_check(spinor *const phi_new, spinor *const phi_old, const i "ERROR: something bad happened... MG converged giving the wrong solution!! Trying to " "restart... \n"); printf( - "ERROR contd: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e " + "ERROR contd: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = " + "%e > %e " "\n", differ[0], differ[1], differ[0] / differ[1], precision); } @@ -215,8 +216,9 @@ static inline int MG_check(spinor *const phi_new, spinor *const phi_old, const i } if (g_debug_level > 0 && g_proc_id == 0) - printf("MGTEST: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", - differ[0], differ[1], differ[0] / differ[1]); + printf( + "MGTEST: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", + differ[0], differ[1], differ[0] / differ[1]); return 1; } @@ -257,7 +259,8 @@ static inline int MG_check_nd(spinor *const up_new, spinor *const dn_new, spinor "ERROR: something bad happened... MG converged giving the wrong solution!! Trying to " "restart... \n"); printf( - "ERROR contd: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e " + "ERROR contd: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = " + "%e > %e " "\n", differ[0], differ[1], differ[0] / differ[1], precision); } @@ -265,8 +268,9 @@ static inline int MG_check_nd(spinor *const up_new, spinor *const dn_new, spinor } if (g_debug_level > 0 && g_proc_id == 0) - printf("MGTEST: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", - differ[0], differ[1], differ[0] / differ[1]); + printf( + "MGTEST: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", + differ[0], differ[1], differ[0] / differ[1]); return 1; } @@ -304,7 +308,8 @@ static inline int MG_mms_check_nd(spinor **const up_new, spinor **const dn_new, "ERROR: something bad happened... MG converged giving the wrong solution!! Trying to " "restart... \n"); printf( - "ERROR contd: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > " + "ERROR contd: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = " + "%e > " "%e \n", differ[0], differ[1], differ[0] / differ[1], precision[i]); } @@ -313,8 +318,9 @@ static inline int MG_mms_check_nd(spinor **const up_new, spinor **const dn_new, } if (g_debug_level > 0 && g_proc_id == 0) - printf("MGTEST: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", - differ[0], differ[1], differ[0] / differ[1]); + printf( + "MGTEST: || s - f_{tmLQC} * f_{TM_USE_DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e \n", + differ[0], differ[1], differ[0] / differ[1]); } finalize_solver(check_vect, 2); @@ -367,8 +373,8 @@ static int MG_pre_solve(su3 **gf) { mg_do_setup = 0; mg_tau = gauge_tau; if (mg_status.success && g_proc_id == 0) - printf("TM_USE_DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n", mg_status.time, - 100. * (mg_status.coarse_time / mg_status.time)); + printf("TM_USE_DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n", + mg_status.time, 100. * (mg_status.coarse_time / mg_status.time)); else if (g_proc_id == 0) printf("ERROR: setup procedure did not run correctly"); } @@ -384,8 +390,8 @@ static int MG_pre_solve(su3 **gf) { mg_update_setup = 0; mg_tau = gauge_tau; if (mg_status.success && g_proc_id == 0) - printf("TM_USE_DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n", mg_status.time, - 100. * (mg_status.coarse_time / mg_status.time)); + printf("TM_USE_DDalphaAMG setup ran, time %.2f sec (%.2f %% on coarse grid)\n", + mg_status.time, 100. * (mg_status.coarse_time / mg_status.time)); else if (g_proc_id == 0) printf("ERROR: setup updating did not run correctly"); } diff --git a/src/lib/buffers/utils_generic_exchange.c b/src/lib/buffers/utils_generic_exchange.c index 474c738ad..d1a68a351 100644 --- a/src/lib/buffers/utils_generic_exchange.c +++ b/src/lib/buffers/utils_generic_exchange.c @@ -127,7 +127,7 @@ void generic_exchange(void *field_in, int bytes_per_site) { /* Following are implementations using different compile time flags */ #if defined TM_NON_BLOCKING #include "utils_generic_exchange.nonblocking.inc" -#else /* TM_NON_BLOCKING */ +#else /* TM_NON_BLOCKING */ #include "utils_generic_exchange.blocking.inc" #endif /* TM_NON_BLOCKING */ } diff --git a/src/lib/deriv_Sb.c b/src/lib/deriv_Sb.c index 7b55eb170..1427c4af0 100644 --- a/src/lib/deriv_Sb.c +++ b/src/lib/deriv_Sb.c @@ -56,7 +56,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field_t* const hf, const double factor) { tm_stopwatch_push(&g_timers, __func__, ""); -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(hf->gaugefield); } @@ -114,7 +114,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sp = k + icy; -#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) +#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR) up = &g_gauge_field_copy[icx][0]; #else up = &hf->gaugefield[ix][0]; @@ -136,7 +136,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sm = k + icy; -#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) +#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR) um = up + 1; #else um = &hf->gaugefield[iy][0]; @@ -159,7 +159,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sp = k + icy; -#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) +#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR) up = um + 1; #else up = &hf->gaugefield[ix][1]; @@ -181,7 +181,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sm = k + icy; -#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) +#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR) um = up + 1; #else um = &hf->gaugefield[iy][1]; @@ -203,7 +203,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sp = k + icy; -#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) +#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR) up = um + 1; #else up = &hf->gaugefield[ix][2]; @@ -225,7 +225,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sm = k + icy; -#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) +#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR) um = up + 1; #else um = &hf->gaugefield[iy][2]; @@ -247,7 +247,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sp = k + icy; -#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) +#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR) up = um + 1; #else up = &hf->gaugefield[ix][3]; @@ -269,7 +269,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field icy = g_lexic2eosub[iy]; sm = k + icy; -#if (defined TM_GAUGE_COPY && !defined TM_USE_HALFSPINOR) +#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR) um = up + 1; #else um = &hf->gaugefield[iy][3]; diff --git a/src/lib/geometry_eo.c b/src/lib/geometry_eo.c index ceb348e1a..f89189357 100644 --- a/src/lib/geometry_eo.c +++ b/src/lib/geometry_eo.c @@ -274,7 +274,8 @@ int Index(const int x0, const int x1, const int x2, const int x3) { y3 = (x3 + LZ) % LZ; ix = ((y0 * LX + y1) * LY + y2) * LZ + y3; -#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \ + (defined TM_PARALLELXYZT)) if (x0 == T) { ix = VOLUME + y3 + LZ * y2 + LZ * LY * y1; } @@ -433,7 +434,8 @@ int Index(const int x0, const int x1, const int x2, const int x3) { /* The DBW2 stuff --> second boundary slice */ /* This we put a the very end. */ -#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \ + (defined TM_PARALLELXYZT)) if (x0 == T + 1) { ix = VOLUMEPLUSRAND + y3 + LZ * y2 + LZ * LY * y1; #if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) @@ -685,14 +687,16 @@ void geometry() { xeven = malloc(VOLUMEPLUSRAND * sizeof(int)); -#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \ + defined TM_PARALLELXYZT) startvaluet = 1; #endif -#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELX || \ - defined TM_PARALLELXY || defined TM_PARALLELXYZ) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || \ + defined TM_PARALLELX || defined TM_PARALLELXY || defined TM_PARALLELXYZ) startvaluex = 1; #endif -#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || defined TM_PARALLELXYZ) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || \ + defined TM_PARALLELXYZ) startvaluey = 1; #endif #if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) @@ -851,7 +855,6 @@ void geometry() { } } - #endif /* TM_PARALLELXYZ || TM_PARALLELXYZT*/ /* The rectangular gauge action part */ @@ -861,7 +864,8 @@ void geometry() { printf("# Initialising rectangular gauge action stuff\n"); fflush(stdout); } -#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \ + defined TM_PARALLELXYZT) for (x1 = -startvaluex; x1 < (LX + startvaluex); x1++) { for (x2 = -startvaluey; x2 < (LY + startvaluey); x2++) { for (x3 = -startvaluez; x3 < (LZ + startvaluez); x3++) { @@ -910,8 +914,8 @@ void geometry() { } } #endif -#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELX || \ - defined TM_PARALLELXY || defined TM_PARALLELXYZ) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || \ + defined TM_PARALLELX || defined TM_PARALLELXY || defined TM_PARALLELXYZ) for (x0 = -startvaluet; x0 < (T + startvaluet); x0++) { for (x2 = -startvaluey; x2 < (LY + startvaluey); x2++) { for (x3 = -startvaluez; x3 < (LZ + startvaluez); x3++) { @@ -959,7 +963,8 @@ void geometry() { } } #endif -#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || defined TM_PARALLELXYZ) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || \ + defined TM_PARALLELXYZ) for (x0 = -startvaluet; x0 < (T + startvaluet); x0++) { for (x1 = -startvaluex; x1 < (LX + startvaluex); x1++) { for (x3 = -startvaluez; x3 < (LZ + startvaluez); x3++) { diff --git a/src/lib/global.h b/src/lib/global.h index b0d3b1ac2..31d6dc0d4 100644 --- a/src/lib/global.h +++ b/src/lib/global.h @@ -121,7 +121,6 @@ EXTERN int *g_field_z_disp_even_up; EXTERN int *g_field_z_disp_odd_dn; EXTERN int *g_field_z_disp_odd_up; - /* IF PHMC */ EXTERN spinor **g_chi_up_spinor_field; EXTERN spinor **g_chi_dn_spinor_field; diff --git a/src/lib/init/init.h b/src/lib/init/init.h index 0fe9ae51b..127622a8b 100644 --- a/src/lib/init/init.h +++ b/src/lib/init/init.h @@ -33,8 +33,8 @@ #include "init/init_gauge_tmp.h" #include "init/init_geometry_indices.h" #include "init/init_global_states.h" -#include "init/init_parallel.h" #include "init/init_moment_field.h" +#include "init/init_parallel.h" #include "init/init_spinor_field.h" #include "init/init_stout_smear_vars.h" #ifdef TM_USE_OMP diff --git a/src/lib/init/init_dirac_halfspinor.c b/src/lib/init/init_dirac_halfspinor.c index 891a703e2..6b4fba174 100644 --- a/src/lib/init/init_dirac_halfspinor.c +++ b/src/lib/init/init_dirac_halfspinor.c @@ -69,15 +69,13 @@ int init_dirac_halfspinor() { errno = 0; return (1); } - sendBuffer = - (halfspinor *)(((unsigned long int)(sendBuffer_) + ALIGN_BASE + 1) & ~ALIGN_BASE); + sendBuffer = (halfspinor *)(((unsigned long int)(sendBuffer_) + ALIGN_BASE + 1) & ~ALIGN_BASE); if ((void *)(recvBuffer_ = (halfspinor *)calloc(RAND / 2 + 8, sizeof(halfspinor))) == NULL) { printf("malloc errno : %d\n", errno); errno = 0; return (1); } - recvBuffer = - (halfspinor *)(((unsigned long int)(recvBuffer_) + ALIGN_BASE + 1) & ~ALIGN_BASE); + recvBuffer = (halfspinor *)(((unsigned long int)(recvBuffer_) + ALIGN_BASE + 1) & ~ALIGN_BASE); #endif for (int ieo = 0; ieo < 2; ieo++) { @@ -94,7 +92,8 @@ int init_dirac_halfspinor() { NBPointer[ieo][8 * i + 2 * mu + 1] = &HalfSpinor[8 * g_lexic2eosub[g_iup[j][mu]] + 2 * mu + 1]; } -#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \ + (defined TM_PARALLELXYZT)) if (t == 0) { k = (g_lexic2eosub[g_idn[j][0]] - VOLUME / 2); NBPointer[ieo][8 * i] = &sendBuffer[k]; @@ -154,7 +153,8 @@ int init_dirac_halfspinor() { for (int mu = 0; mu < 8; mu++) { NBPointer[ieo][8 * i + mu] = &HalfSpinor[8 * i + mu]; } -#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \ + (defined TM_PARALLELXYZT)) if (t == T - 1) { NBPointer[ieo][8 * i] = &recvBuffer[(g_lexic2eosub[g_iup[j][0]] - VOLUME / 2)]; } @@ -240,7 +240,8 @@ int init_dirac_halfspinor32() { NBPointer32[ieo][8 * i + 2 * mu + 1] = &HalfSpinor32[8 * g_lexic2eosub[g_iup[j][mu]] + 2 * mu + 1]; } -#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \ + (defined TM_PARALLELXYZT)) if (t == 0) { k = (g_lexic2eosub[g_idn[j][0]] - VOLUME / 2); NBPointer32[ieo][8 * i] = &sendBuffer32[k]; @@ -300,7 +301,8 @@ int init_dirac_halfspinor32() { for (mu = 0; mu < 8; mu++) { NBPointer32[ieo][8 * i + mu] = &HalfSpinor32[8 * i + mu]; } -#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT)) +#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \ + (defined TM_PARALLELXYZT)) if (t == T - 1) { NBPointer32[ieo][8 * i] = &recvBuffer32[(g_lexic2eosub[g_iup[j][0]] - VOLUME / 2)]; } diff --git a/src/lib/init/init_geometry_indices.c b/src/lib/init/init_geometry_indices.c index 6b75fc83a..edd568d93 100644 --- a/src/lib/init/init_geometry_indices.c +++ b/src/lib/init/init_geometry_indices.c @@ -74,7 +74,6 @@ int init_geometry_indices(const int V) { if ((void *)g_field_z_disp_odd_up == NULL) return (17); #endif - g_coord = (int **)calloc(VOLUME, sizeof(int *)); if ((void *)g_coord == NULL) return (19); for (i = 0; i < VOLUME; i++) { diff --git a/src/lib/io/utils_write_first_message.c b/src/lib/io/utils_write_first_message.c index 287d67c37..4233789cc 100644 --- a/src/lib/io/utils_write_first_message.c +++ b/src/lib/io/utils_write_first_message.c @@ -30,9 +30,9 @@ int write_first_messages(FILE* parameterfile, char const* const executable, TMLQCD_PACKAGE_VERSION, git_hash); printf("%s", message); fprintf(parameterfile, "%s", message); -#ifdef TM_GAUGE_COPY - printf("# The code is compiled with -DTM_GAUGE_COPY\n"); - fprintf(parameterfile, "# The code is compiled with -DTM_GAUGE_COPY\n"); +#ifdef TM_USE_GAUGE_COPY + printf("# The code is compiled with -DTM_USE_GAUGE_COPY\n"); + fprintf(parameterfile, "# The code is compiled with -DTM_USE_GAUGE_COPY\n"); #endif #ifdef TM_USE_HALFSPINOR printf("# The code is compiled with -DTM_USE_HALFSPINOR\n"); diff --git a/src/lib/linalg/assign.c b/src/lib/linalg/assign.c index fd04de1e4..19fcda44b 100644 --- a/src/lib/linalg/assign.c +++ b/src/lib/linalg/assign.c @@ -47,4 +47,3 @@ void assign_32(spinor32 *const R, spinor32 *const S, const int N) { memcpy(R, S, N * sizeof(spinor32)); return; } - diff --git a/src/lib/linalg/assign_add_mul_r_32.c b/src/lib/linalg/assign_add_mul_r_32.c index 9f6b1a72f..5ab9366ac 100644 --- a/src/lib/linalg/assign_add_mul_r_32.c +++ b/src/lib/linalg/assign_add_mul_r_32.c @@ -35,7 +35,7 @@ #include "su3.h" void assign_add_mul_r_32_orphaned(spinor32 *const R, spinor32 *const S, const float c, - const int N) { + const int N) { #ifdef TM_USE_OMP #pragma omp parallel for #endif diff --git a/src/lib/linalg/scalar_prod_r.c b/src/lib/linalg/scalar_prod_r.c index f4fd9293b..c5288aa34 100644 --- a/src/lib/linalg/scalar_prod_r.c +++ b/src/lib/linalg/scalar_prod_r.c @@ -97,4 +97,3 @@ double scalar_prod_r(const spinor *const S, const spinor *const R, const int N, #endif return res; } - diff --git a/src/lib/matrix_utils.c b/src/lib/matrix_utils.c index d5c4198ea..63c98657b 100644 --- a/src/lib/matrix_utils.c +++ b/src/lib/matrix_utils.c @@ -30,9 +30,8 @@ #ifndef TM_USE_OMP static #endif - void - exponent_from_coefficients(su3 *out, _Complex double f0, _Complex double f1, _Complex double f2, - su3 const *in) { + void exponent_from_coefficients(su3 *out, _Complex double f0, _Complex double f1, + _Complex double f2, su3 const *in) { su3 ALIGN tmp; _complex_times_su3(tmp, f2, *in); _su3_add_equals_complex_identity(tmp, f1); diff --git a/src/lib/measure_gauge_action.c b/src/lib/measure_gauge_action.c index 1f7cb6ad5..ecbe7a888 100644 --- a/src/lib/measure_gauge_action.c +++ b/src/lib/measure_gauge_action.c @@ -26,10 +26,10 @@ * Returns the value of the action ************************************************************************/ -#include #include #include #include +#include #ifdef TM_USE_OMP #include #endif diff --git a/src/lib/misc_types.h b/src/lib/misc_types.h index fee62159f..412719dce 100644 --- a/src/lib/misc_types.h +++ b/src/lib/misc_types.h @@ -101,7 +101,7 @@ typedef enum tm_mpi_thread_level_t { TM_MPI_THREAD_SINGLE = QMP_THREAD_SINGLE, TM_MPI_THREAD_MULTIPLE = QMP_THREAD_MULTIPLE } tm_mpi_thread_level_t; -#elif defined(TM_USE_MPI) +#elif defined(TM_USE_MPI) typedef enum tm_mpi_thread_level_t { TM_MPI_THREAD_SINGLE = MPI_THREAD_SERIALIZED, TM_MPI_THREAD_MULTIPLE = MPI_THREAD_MULTIPLE diff --git a/src/lib/mpi_init.c b/src/lib/mpi_init.c index cc09fd4cd..f245f0556 100644 --- a/src/lib/mpi_init.c +++ b/src/lib/mpi_init.c @@ -347,18 +347,20 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { for (i = 0; i < 8; i++) { g_nb_list[i] = g_cart_id; } -#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \ + defined TM_PARALLELXYZT) MPI_Cart_shift(g_cart_grid, 0, 1, &g_nb_t_dn, &g_nb_t_up); g_nb_list[0] = g_nb_t_up; g_nb_list[1] = g_nb_t_dn; #endif -#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELX || \ - defined TM_PARALLELXY || defined TM_PARALLELXYZ) +#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || \ + defined TM_PARALLELX || defined TM_PARALLELXY || defined TM_PARALLELXYZ) MPI_Cart_shift(g_cart_grid, 1, 1, &g_nb_x_dn, &g_nb_x_up); g_nb_list[2] = g_nb_x_up; g_nb_list[3] = g_nb_x_dn; #endif -#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || defined TM_PARALLELXYZ) +#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || \ + defined TM_PARALLELXYZ) MPI_Cart_shift(g_cart_grid, 2, 1, &g_nb_y_dn, &g_nb_y_up); g_nb_list[4] = g_nb_y_up; g_nb_list[5] = g_nb_y_dn; @@ -552,7 +554,6 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { MPI_Type_commit(&lfield_z_slice_cont32); MPI_Type_commit(&lfield_z_slice_gath32); - /* The internal z_ and zt_ slices are constructed in geometry() with MPI_Type_indexed() */ /* Now the derivative fields */ diff --git a/src/lib/mpi_init.h b/src/lib/mpi_init.h index d9476e662..c6e816946 100644 --- a/src/lib/mpi_init.h +++ b/src/lib/mpi_init.h @@ -107,9 +107,8 @@ extern MPI_Datatype halffield_y_slice_cont; extern MPI_Datatype halffield_y_slice_gath; extern MPI_Datatype halffield_z_slice_cont; - -#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || \ - defined TM_PARALLELXYZ) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \ + defined TM_PARALLELXYZT || defined TM_PARALLELXYZ) extern MPI_Datatype field_z_slice_even_dn; extern MPI_Datatype field_z_slice_even_up; extern MPI_Datatype field_z_slice_odd_dn; diff --git a/src/lib/operator/D_psi_body.c b/src/lib/operator/D_psi_body.c index b5acd1158..f73822776 100644 --- a/src/lib/operator/D_psi_body.c +++ b/src/lib/operator/D_psi_body.c @@ -283,7 +283,7 @@ void _PSWITCH(D_psi)(_PTSWITCH(spinor) *const P, _PTSWITCH(spinor) *const Q) { _C_TYPE ALIGN32 phase_2l = (_C_TYPE)phase_2; _C_TYPE ALIGN32 phase_3l = (_C_TYPE)phase_3; -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (_PSWITCH(g_update_gauge_copy)) { _PSWITCH(update_backward_gauge)(_PSWITCH(g_gauge_field)); } diff --git a/src/lib/operator/Hopping_Matrix.c b/src/lib/operator/Hopping_Matrix.c index 8b106e10a..759809a8e 100644 --- a/src/lib/operator/Hopping_Matrix.c +++ b/src/lib/operator/Hopping_Matrix.c @@ -68,7 +68,7 @@ #include "operator/halfspinor_hopping.h" void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -91,7 +91,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { #else /* thats TM_USE_HALFSPINOR */ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } diff --git a/src/lib/operator/Hopping_Matrix_32.c b/src/lib/operator/Hopping_Matrix_32.c index 1198d52bb..0991811b7 100644 --- a/src/lib/operator/Hopping_Matrix_32.c +++ b/src/lib/operator/Hopping_Matrix_32.c @@ -63,8 +63,8 @@ #endif #include "boundary.h" #include "init/init_dirac_halfspinor.h" -#include "update_backward_gauge.h" #include "operator/Hopping_Matrix_32.h" +#include "update_backward_gauge.h" #if defined TM_USE_HALFSPINOR #include "operator/halfspinor_hopping_32.h" @@ -72,7 +72,7 @@ void Hopping_Matrix_32_orphaned(const int ieo, spinor32* const l, spinor32* const k) { #if defined TM_USE_HALFSPINOR -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (g_update_gauge_copy_32) { update_backward_gauge_32_orphaned(g_gauge_field_32); } diff --git a/src/lib/operator/halfspinor_body.c b/src/lib/operator/halfspinor_body.c index a2c54c7e4..3be906764 100644 --- a/src/lib/operator/halfspinor_body.c +++ b/src/lib/operator/halfspinor_body.c @@ -103,7 +103,7 @@ if (g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) { #endif #if (defined TM_USE_MPI && !defined _NO_COMM) - xchange_halffield32(); + xchange_halffield32(); #endif #ifdef TM_USE_OMP @@ -237,7 +237,7 @@ if (g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) { #endif #if (defined TM_USE_MPI && !defined _NO_COMM) - xchange_halffield(); + xchange_halffield(); #endif #ifdef TM_USE_OMP diff --git a/src/lib/operator/hopping_bg_dbl.c b/src/lib/operator/hopping_bg_dbl.c index 93af99e24..6f8f3778d 100644 --- a/src/lib/operator/hopping_bg_dbl.c +++ b/src/lib/operator/hopping_bg_dbl.c @@ -41,7 +41,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { __alignx(16, l); __alignx(16, k); -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -64,7 +64,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { sp = k + icy; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = &g_gauge_field_copy[ioff][0]; #else up = &g_gauge_field[ix][0]; @@ -76,7 +76,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { /*********************** direction +0 ************************/ iy = g_idn[ix][0]; icy = g_lexic2eosub[iy]; -#if (!defined TM_GAUGE_COPY) +#if (!defined TM_USE_GAUGE_COPY) um = &g_gauge_field[iy][0]; #else um = up + 1; @@ -90,7 +90,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_iup[ix][1]; icy = g_lexic2eosub[iy]; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -104,7 +104,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_idn[ix][1]; icy = g_lexic2eosub[iy]; -#ifndef TM_GAUGE_COPY +#ifndef TM_USE_GAUGE_COPY um = &g_gauge_field[iy][1]; #else um = up + 1; @@ -117,7 +117,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_iup[ix][2]; icy = g_lexic2eosub[iy]; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -131,7 +131,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_idn[ix][2]; icy = g_lexic2eosub[iy]; -#ifndef TM_GAUGE_COPY +#ifndef TM_USE_GAUGE_COPY um = &g_gauge_field[iy][2]; #else um = up + 1; @@ -145,7 +145,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_iup[ix][3]; icy = g_lexic2eosub[iy]; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -158,7 +158,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_idn[ix][3]; icy = g_lexic2eosub[iy]; -#ifndef TM_GAUGE_COPY +#ifndef TM_USE_GAUGE_COPY um = &g_gauge_field[iy][3]; #else um = up + 1; @@ -174,7 +174,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) { iy = g_iup[iz][0]; icy = g_lexic2eosub[iy]; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up = &g_gauge_field[iz][0]; diff --git a/src/lib/operator/hopping_body_dbl.c b/src/lib/operator/hopping_body_dbl.c index c3eefb74a..ea3b8cf2c 100644 --- a/src/lib/operator/hopping_body_dbl.c +++ b/src/lib/operator/hopping_body_dbl.c @@ -43,7 +43,7 @@ if (ieo == 0) { #ifndef TM_USE_OMP hi = &g_hi[16 * ioff]; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = &g_gauge_field_copy[ioff][0]; #else up = &g_gauge_field[(*hi)][0]; @@ -60,7 +60,7 @@ hi++; for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { #ifdef TM_USE_OMP hi = &g_hi[16 * icx]; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = &g_gauge_field_copy[icx][0]; #else up = &g_gauge_field[(*hi)][0]; @@ -74,7 +74,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { pn = p + (icx - ioff); #endif /*********************** direction +t ************************/ -#if (!defined TM_GAUGE_COPY) +#if (!defined TM_USE_GAUGE_COPY) um = &g_gauge_field[(*hi)][0]; #else um = up + 1; @@ -86,7 +86,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_t_p(); /*********************** direction -t ************************/ -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -97,7 +97,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_t_m(); /*********************** direction +1 ************************/ -#ifndef TM_GAUGE_COPY +#ifndef TM_USE_GAUGE_COPY um = &g_gauge_field[(*hi)][1]; #else um = up + 1; @@ -109,7 +109,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_x_p(); /*********************** direction -1 ************************/ -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -120,7 +120,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_x_m(); /*********************** direction +2 ************************/ -#ifndef TM_GAUGE_COPY +#ifndef TM_USE_GAUGE_COPY um = &g_gauge_field[(*hi)][2]; #else um = up + 1; @@ -132,7 +132,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_y_p(); /*********************** direction -2 ************************/ -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -143,7 +143,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { _hop_y_m(); /*********************** direction +3 ************************/ -#ifndef TM_GAUGE_COPY +#ifndef TM_USE_GAUGE_COPY um = &g_gauge_field[(*hi)][3]; #else um = up + 1; @@ -156,7 +156,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) { /*********************** direction -3 ************************/ #ifndef TM_USE_OMP -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up = &g_gauge_field[(*hi)][0]; diff --git a/src/lib/operator/hopping_sgl.c b/src/lib/operator/hopping_sgl.c index 062507158..487bfc47f 100644 --- a/src/lib/operator/hopping_sgl.c +++ b/src/lib/operator/hopping_sgl.c @@ -37,7 +37,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { spinor32* restrict r, * restrict sp, * restrict sm; spinor32 temp; -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(); } @@ -72,7 +72,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sp = k + icy; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = &g_gauge_field_copy[icx][0]; #else up = &g_gauge_field[ix][0]; @@ -100,7 +100,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sm = k + icy; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) um = up + 1; #else um = &g_gauge_field[iy][0]; @@ -129,7 +129,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { sp = k + icy; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -157,7 +157,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sm = k + icy; -#ifndef TM_GAUGE_COPY +#ifndef TM_USE_GAUGE_COPY um = &g_gauge_field[iy][1]; #else um = up + 1; @@ -185,7 +185,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sp = k + icy; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -212,7 +212,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sm = k + icy; -#ifndef TM_GAUGE_COPY +#ifndef TM_USE_GAUGE_COPY um = &g_gauge_field[iy][2]; #else um = up + 1; @@ -240,7 +240,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sp = k + icy; -#if ((defined TM_GAUGE_COPY)) +#if ((defined TM_USE_GAUGE_COPY)) up = um + 1; #else up += 1; @@ -267,7 +267,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) { icy = g_lexic2eosub[iy]; sm = k + icy; -#ifndef TM_GAUGE_COPY +#ifndef TM_USE_GAUGE_COPY um = &g_gauge_field[iy][3]; #else um = up + 1; diff --git a/src/lib/operator/tm_sub_Hopping_Matrix.c b/src/lib/operator/tm_sub_Hopping_Matrix.c index 857404088..7edf2c954 100644 --- a/src/lib/operator/tm_sub_Hopping_Matrix.c +++ b/src/lib/operator/tm_sub_Hopping_Matrix.c @@ -56,7 +56,7 @@ void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* const p, spinor* const k, complex double const cfactor) { -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -81,7 +81,7 @@ void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* const p, spin #elif (!defined _NO_COMM && !defined TM_USE_HALFSPINOR) void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* p, spinor* const k, complex double const cfactor) { -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } diff --git a/src/lib/operator/tm_times_Hopping_Matrix.c b/src/lib/operator/tm_times_Hopping_Matrix.c index 6d1abddba..9b09c090f 100644 --- a/src/lib/operator/tm_times_Hopping_Matrix.c +++ b/src/lib/operator/tm_times_Hopping_Matrix.c @@ -56,7 +56,7 @@ void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k, complex double const cfactor) { -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -81,7 +81,7 @@ void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k, #elif (!defined _NO_COMM && !defined TM_USE_HALFSPINOR) void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k, double complex const cfactor) { -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY if (g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } @@ -103,4 +103,4 @@ void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k, #endif return; } -#endif //TM_USE_HALFSPINOR && !defined _NO_COMM +#endif // TM_USE_HALFSPINOR && !defined _NO_COMM diff --git a/src/lib/read_input.l b/src/lib/read_input.l index 59f002748..5eb542f87 100644 --- a/src/lib/read_input.l +++ b/src/lib/read_input.l @@ -951,7 +951,7 @@ static inline double fltlist_next_token(int * const list_end){ mg_no_shifts=0; if(myverbose) printf(" MG_MMS_Mass set to %.16f line %d operator %d\n", mg_mms_mass, line_of_file, current_operator); } - End_DDalphaAMG{SPC}* { + EndDDalphaAMG{SPC}* { if(myverbose) printf("DDalphaAMG parsed in line %d\n\n", line_of_file); BEGIN(0); } diff --git a/src/lib/smearing/utils_reunitarize_MILC.c b/src/lib/smearing/utils_reunitarize_MILC.c index b5efa2936..fec177a42 100644 --- a/src/lib/smearing/utils_reunitarize_MILC.c +++ b/src/lib/smearing/utils_reunitarize_MILC.c @@ -1,5 +1,5 @@ -#include "utils.ih" #include +#include "utils.ih" /* No reunitarization code seems to be available, so I've adapted (stolen) this routine from the * MILC code (who stole it elsewhere, I think ;]) -- AD. */ @@ -36,7 +36,7 @@ void reunitarize(su3 *omega) { bj2 = omega->c02; omega->c20 = bj1 * omega->c12; - omega->c20 -= bj2 *omega->c11; + omega->c20 -= bj2 * omega->c11; omega->c21 = bj2 * omega->c10; omega->c21 -= bj0 * omega->c12; diff --git a/src/lib/solver/gram-schmidt.c b/src/lib/solver/gram-schmidt.c index ffd5d6b29..4c2ee4310 100644 --- a/src/lib/solver/gram-schmidt.c +++ b/src/lib/solver/gram-schmidt.c @@ -75,7 +75,6 @@ void IteratedClassicalGS(_Complex double v[], double *vnrm, int n, int m, _Compl } } - /* * ModifiedGramSchmidt * diff --git a/src/lib/test/check_geometry.c b/src/lib/test/check_geometry.c index b9f14eb4d..20f7acc96 100644 --- a/src/lib/test/check_geometry.c +++ b/src/lib/test/check_geometry.c @@ -90,7 +90,8 @@ int check_geometry() { ix = g_ipt[x0][x1][x2][x3]; iy0 = g_iup[ix][0]; -#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \ + defined TM_PARALLELXYZT) if (x0 != T - 1) { iz0 = g_ipt[(x0 + 1) % T][x1][x2][x3]; } else { @@ -176,7 +177,8 @@ int check_geometry() { } iy0 = g_idn[ix][0]; -#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) +#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \ + defined TM_PARALLELXYZT) if (x0 != 0) { iz0 = g_ipt[(x0 + T - 1) % T][x1][x2][x3]; } else { @@ -1554,4 +1556,3 @@ int check_geometry() { return (0); } - diff --git a/src/lib/test/check_overlap.c b/src/lib/test/check_overlap.c index 56763cff4..b032f8cdd 100644 --- a/src/lib/test/check_overlap.c +++ b/src/lib/test/check_overlap.c @@ -188,7 +188,7 @@ int main(int argc, char *argv[]) { g_dbw2rand = 0; #endif -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY j = init_gauge_field(VOLUMEPLUSRAND, 1); #else j = init_gauge_field(VOLUMEPLUSRAND, 0); diff --git a/src/lib/wrapper/lib_wrapper.c b/src/lib/wrapper/lib_wrapper.c index 9f083adc5..19d36ddc6 100644 --- a/src/lib/wrapper/lib_wrapper.c +++ b/src/lib/wrapper/lib_wrapper.c @@ -60,11 +60,11 @@ #include "misc_types.h" #include "mpi_init.h" #include "operator.h" +#include "operator/clover_leaf.h" +#include "qphix_interface.h" #include "read_input.h" #include "sighandler.h" #include "start.h" -#include "operator/clover_leaf.h" -#include "qphix_interface.h" #define CONF_FILENAME_LENGTH 500 @@ -121,7 +121,7 @@ int tmLQCD_invert_init(int argc, char* argv[], const int _verbose, const int ext for (int j = 0; j < no_operators; j++) if (!operator_list[j].even_odd_flag) even_odd_flag = 0; -#ifdef TM_GAUGE_COPY +#ifdef TM_USE_GAUGE_COPY int j = init_gauge_field(VOLUMEPLUSRAND, 1); j += init_gauge_field_32(VOLUMEPLUSRAND, 1); #else diff --git a/src/lib/xchange/xchange_gauge.c b/src/lib/xchange/xchange_gauge.c index 6177a3dbb..254702822 100644 --- a/src/lib/xchange/xchange_gauge.c +++ b/src/lib/xchange/xchange_gauge.c @@ -960,5 +960,4 @@ void xchange_gauge(su3** const gf) { return; } - #endif /* TM_NON_BLOCKING */ From 83f6c401c0da71640a15f93342ad56d5ed70ee59 Mon Sep 17 00:00:00 2001 From: Mathieu Taillefumier Date: Tue, 17 Feb 2026 18:26:48 +0100 Subject: [PATCH 06/19] Improvements - QUDA_FERMIONIC_FORCES and QUDA_EXPERIMENTAL are always on - Removed KOJAK instrumentation - Added a custom Qphix find package file because the original one is broken --- .github/workflows/qphix-build.yaml | 5 ++- CMakeLists.txt | 18 +-------- cmake/FindQphix.cmake | 39 ++++++++++++++++++ cmake/tmlqcd_config_internal.h.in | 17 ++++---- src/bin/deriv_mg_tune.c | 8 ---- src/bin/hmc_tm.c | 8 ---- src/bin/invert.c | 8 ---- src/bin/offline_measurement.c | 9 ----- src/lib/deriv_Sb.c | 7 ---- src/lib/deriv_Sb_D_psi.c | 7 ---- src/lib/get_rectangle_staples.c | 6 --- src/lib/get_staples.c | 21 ---------- src/lib/operator/Hopping_Matrix_nocom.c | 3 -- src/lib/operator/halfspinor_body.c | 7 ---- src/lib/quda_interface.c | 20 ---------- src/lib/test/check_overlap.c | 8 ---- src/lib/update_gauge.c | 53 +++++++------------------ src/lib/xchange/xchange_2fields.c | 7 ---- src/lib/xchange/xchange_halffield.c | 13 ------ src/lib/xchange/xchange_lexicfield.c | 28 ------------- 20 files changed, 66 insertions(+), 226 deletions(-) create mode 100644 cmake/FindQphix.cmake diff --git a/.github/workflows/qphix-build.yaml b/.github/workflows/qphix-build.yaml index eef1b5055..ec4ec5394 100644 --- a/.github/workflows/qphix-build.yaml +++ b/.github/workflows/qphix-build.yaml @@ -172,9 +172,10 @@ jobs: -DTM_USE_OMP=ON \ -DTM_USE_LEMON=ON \ -DTM_USE_QPHIX=ON \ - -DCMAKE_CXXFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ - -DCMAKE_CFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ + -DCMAKE_CXX_FLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ + -DCMAKE_C_FLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \ -DQPHIX_DIR="${{github.workspace}}/qphix/build/install_dir" \ + -DQMP_DIR="${{github.workspace}}/qmp/build/install_dir" \ .. make -j > config.log diff --git a/CMakeLists.txt b/CMakeLists.txt index a375ad14b..803feeef7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,7 +100,7 @@ option(TM_USE_SHMEM "Use shmem API" OFF) option(TM_USE_QUDA "Enable QUDA support" OFF) option(TM_ENABLE_WARNINGS "Enable all warnings" ON) option(TM_ENABLE_TESTS "Enable tests" OFF) - +set(TM_QPHIX_SOALEN "4" CACHE STRING "QPhiX specific parameter") # MPI dependent options cmake_dependent_option( TM_PERSISTENT_MPI "Use persistent MPI calls for halfspinor [default=no]" OFF @@ -123,12 +123,6 @@ cmake_dependent_option(TM_USE_LEMON "Use the lemon io library" OFF "TM_USE_MPI" ON) # GPU dependent options -cmake_dependent_option(TM_USE_QUDA_EXPERIMENTAL "Enable QUDA support" ON - "TM_USE_QUDA" OFF) -cmake_dependent_option( - TM_QUDA_FERMIONIC_FORCES "Enable support for fermionic forces using QUDA" ON - "TM_USE_QUDA" OFF) - cmake_dependent_option(TM_USE_NVHPC "Enable Nvidia HPC toolkit" OFF "TM_USE_CUDA" OFF) @@ -253,15 +247,7 @@ if(TM_USE_HIP OR QUDA_TARGET_HIP) endif() if(TM_USE_QPHIX) - find_package(QPhiX REQUIRED CONFIG) - message("${QPhiX_LIBRARIES}") - if(NOT TARGET tmlqcd::qphix) - add_library(tmlqcd::qphix INTERFACE IMPORTED) - set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_LINK_LIBRARIES - "${QPhiX_LIBRARIES}") - set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "${QPhiX_INCLUDE_DIRS}") - endif() + find_package(Qphix REQUIRED) endif() # check for fftw3 (rely on pkgconfig). diff --git a/cmake/FindQphix.cmake b/cmake/FindQphix.cmake new file mode 100644 index 000000000..15ab2d47a --- /dev/null +++ b/cmake/FindQphix.cmake @@ -0,0 +1,39 @@ +find_library(TM_QMP_LIBS NAMES qmp PATH_SUFFIXES "lib" "lib64") +find_library(TM_QPHIX_LIBS_CODEGEN NAMES "qphix_codegen" PATH_SUFFIXES "lib" "lib64") +find_library(TM_QPHIX_LIBS_SOLVER NAMES "qphix_solver" PATH_SUFFIXES "lib" "lib64") + +message("${QMP_DIR}") + +find_path( + TM_QMP_INCLUDE_DIRS + NAMES qmp.h + PATH_SUFFIXES "include" + PATHS "${QMP_DIR}") + +find_path( + TM_QPHIX_INCLUDE_DIRS + NAMES qphix_config.h + PATH_SUFFIXES "qphix" + PATHS "${QPHIX_DIR}") +find_path( + TM_QPHIX_CODEGEN_INCLUDE_DIRS + NAMES qpx_utils.h + PATH_SUFFIXES "qphix_codegen" + PATHS "${QPHIX_DIR}") + +message("${TM_QMP_INCLUDE_DIRS} ${TM_QPHIX_INCLUDE_DIRS} ${TM_QMP_LIBS} ${TM_QPHIX_LIBS_CODEGEN} ${TM_QPHIX_LIBS_SOLVER}") + +find_package_handle_standard_args( + Qphix DEFAULT_MSG TM_QPHIX_LIBS_CODEGEN TM_QPHIX_LIBS_SOLVER TM_QPHIX_INCLUDE_DIRS TM_QMP_LIBS TM_QMP_INCLUDE_DIRS TM_QPHIX_CODEGEN_INCLUDE_DIRS) + +if(TM_QMP_LIBS + AND TM_QPHIX_INCLUDE_DIRS + AND NOT TARGET tmlqcd::qphix) + add_library(tmlqcd::qphix INTERFACE IMPORTED) + set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_LINK_LIBRARIES + "${TM_QPHIX_LIBS_CODEGEN};${TM_QPHIX_LIBS_SOLVER};${TM_QMP_LIBS}") + set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + "${TM_QMP_INCLUDE_DIRS};${TM_QPHIX_INCLUDE_DIRS}/..;${TM_QPHIX_INCLUDE_DIRS};${TM_QPHIX_CODEGEN_INCLUDE_DIRS}") +endif() + +mark_as_advanced(TM_QPHIX_LIBRARIES TM_QPHIX_INCLUDE_DIRS TM_QMP_LIBS TM_QMP_INCLUDE_DIRS) diff --git a/cmake/tmlqcd_config_internal.h.in b/cmake/tmlqcd_config_internal.h.in index fb8d7d818..145df156a 100644 --- a/cmake/tmlqcd_config_internal.h.in +++ b/cmake/tmlqcd_config_internal.h.in @@ -66,7 +66,7 @@ #define ALIGN_BASE @ALIGN_BASE@ /* Alignment compiler hint macro */ -#cmakedefine ALIGN @ALIGN@ +#define ALIGN @ALIGN@ /* Alignment for 32bit arrays -- necessary for SSE and automated vectorization */ #define ALIGN_BASE32 @ALIGN_BASE32@ @@ -76,10 +76,10 @@ /* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a `char[]'. */ -#cmakedefine YYTEXT_POINTER +//#cmakedefine YYTEXT_POINTER /* Number of bits in a file offset, on hosts where this is settable. */ -#cmakedefine TM_FILE_OFFSET_BITS @TMLQCD_FILE_OFFSET_BITS@ +#define TM_FILE_OFFSET_BITS @TMLQCD_FILE_OFFSET_BITS@ /* Construct an extra copy of the gauge fields */ #cmakedefine TM_USE_GAUGE_COPY @@ -111,17 +111,14 @@ /* Using QUDA GPU */ #cmakedefine TM_USE_QUDA -/* Using experimental QUDA version */ -#cmakedefine TM_QUDA_EXPERIMENTAL - -/* Using QUDA fermionic forces */ -#cmakedefine TM_QUDA_FERMIONIC_FORCES - /* Using DDalphaAMG */ #cmakedefine TM_USE_DDalphaAMG /* Using QPHIX */ #cmakedefine TM_USE_QPHIX +#ifdef TM_USE_QPHIX /* Structure of Array length to use with QPhiX */ -#cmakedefine QPHIX_SOALEN @TMLQCD_QPHIX_SOALEN@ +#define QPHIX_SOALEN @TM_QPHIX_SOALEN@ +#endif + diff --git a/src/bin/deriv_mg_tune.c b/src/bin/deriv_mg_tune.c index f65b22c48..7c45524de 100644 --- a/src/bin/deriv_mg_tune.c +++ b/src/bin/deriv_mg_tune.c @@ -98,11 +98,6 @@ int main(int argc, char *argv[]) { init_critical_globals(TM_PROGRAM_DERIV_MG_TUNE); -#ifdef TM_KOJAK_INST -#pragma pomp inst init -#pragma pomp inst begin(main) -#endif - verbose = 1; g_use_clover_flag = 0; @@ -367,9 +362,6 @@ int main(int argc, char *argv[]) { #endif return (0); -#ifdef TM_KOJAK_INST -#pragma pomp inst end(main) -#endif } static void usage(const tm_ExitCode_t exit_code) { diff --git a/src/bin/hmc_tm.c b/src/bin/hmc_tm.c index 399362d0b..b68a5250f 100644 --- a/src/bin/hmc_tm.c +++ b/src/bin/hmc_tm.c @@ -113,11 +113,6 @@ int main(int argc, char *argv[]) { init_critical_globals(TM_PROGRAM_HMC_TM); -#ifdef TM_KOJAK_INST -#pragma pomp inst init -#pragma pomp inst begin(main) -#endif - strcpy(gauge_filename, "conf.save"); strcpy(nstore_filename, "nstore_counter"); strcpy(tmp_filename, ".conf.tmp"); @@ -591,9 +586,6 @@ int main(int argc, char *argv[]) { #endif return (0); -#ifdef TM_KOJAK_INST -#pragma pomp inst end(main) -#endif } static void usage(const tm_ExitCode_t exit_code) { diff --git a/src/bin/invert.c b/src/bin/invert.c index bb6f15c10..b5040ba88 100644 --- a/src/bin/invert.c +++ b/src/bin/invert.c @@ -114,11 +114,6 @@ int main(int argc, char *argv[]) { init_critical_globals(TM_PROGRAM_INVERT); -#ifdef TM_KOJAK_INST -#pragma pomp inst init -#pragma pomp inst begin(main) -#endif - DUM_DERI = 8; DUM_MATRIX = DUM_DERI + 5; NO_OF_SPINORFIELDS = DUM_MATRIX + 4; @@ -457,9 +452,6 @@ int main(int argc, char *argv[]) { MPI_Finalize(); #endif return (0); -#ifdef TM_KOJAK_INST -#pragma pomp inst end(main) -#endif } static void usage(tm_ExitCode_t exit_code) { diff --git a/src/bin/offline_measurement.c b/src/bin/offline_measurement.c index c1422858f..b6cbc13fa 100644 --- a/src/bin/offline_measurement.c +++ b/src/bin/offline_measurement.c @@ -83,11 +83,6 @@ int main(int argc, char *argv[]) { init_critical_globals(TM_PROGRAM_OFFLINE_MEASUREMENT); -#ifdef TM_KOJAK_INST -#pragma pomp inst init -#pragma pomp inst begin(main) -#endif - DUM_DERI = 8; DUM_MATRIX = DUM_DERI + 5; NO_OF_SPINORFIELDS = DUM_MATRIX + 3; @@ -306,10 +301,6 @@ int main(int argc, char *argv[]) { MPI_Finalize(); #endif return (0); - -#ifdef TM_KOJAK_INST -#pragma pomp inst end(main) -#endif } static void usage(const tm_ExitCode_t exit_code) { diff --git a/src/lib/deriv_Sb.c b/src/lib/deriv_Sb.c index 1427c4af0..c48c8db38 100644 --- a/src/lib/deriv_Sb.c +++ b/src/lib/deriv_Sb.c @@ -83,10 +83,6 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field #ifdef TM_USE_OMP #undef static -#endif - -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(derivSb) #endif if (ieo == 0) { @@ -292,7 +288,4 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field } /* OpenMP closing brace */ #endif tm_stopwatch_pop(&g_timers, 0, 1, ""); -#ifdef TM_KOJAK_INST -#pragma pomp inst end(derivSb) -#endif } diff --git a/src/lib/deriv_Sb_D_psi.c b/src/lib/deriv_Sb_D_psi.c index 61da4b9d2..3f3319efc 100644 --- a/src/lib/deriv_Sb_D_psi.c +++ b/src/lib/deriv_Sb_D_psi.c @@ -61,10 +61,6 @@ void deriv_Sb_D_psi(spinor* const l, spinor* const k, hamiltonian_field_t* const #ifdef TM_USE_OMP #undef static -#endif - -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(derivSb) #endif /************** loop over all lattice sites ****************/ @@ -225,9 +221,6 @@ void deriv_Sb_D_psi(spinor* const l, spinor* const k, hamiltonian_field_t* const /****************** end of loop ************************/ } -#ifdef TM_KOJAK_INST -#pragma pomp inst end(derivSb) -#endif #ifdef TM_USE_OMP } /*OpenMP closing brace */ diff --git a/src/lib/get_rectangle_staples.c b/src/lib/get_rectangle_staples.c index eab6b9d9e..c8f69596b 100644 --- a/src/lib/get_rectangle_staples.c +++ b/src/lib/get_rectangle_staples.c @@ -34,9 +34,6 @@ void get_rectangle_staples_general(su3 *const v, const int x, const int mu, const su3 *const *const gf) { su3 ALIGN tmp1, tmp2; const su3 *a, *b, *c, *d, *e; -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(rectstaples) -#endif _su3_zero((*v)); for (int nu = 0; nu < 4; nu++) { if (mu != nu) { @@ -178,7 +175,4 @@ void get_rectangle_staples_general(su3 *const v, const int x, const int mu, _su3_times_su3_acc((*v), tmp2, tmp1); } } -#ifdef TM_KOJAK_INST -#pragma pomp inst end(rectstaples) -#endif } diff --git a/src/lib/get_staples.c b/src/lib/get_staples.c index b33010f2c..ae7f19d09 100644 --- a/src/lib/get_staples.c +++ b/src/lib/get_staples.c @@ -35,10 +35,6 @@ void get_staples(su3* const staple, const int x, const int mu, const su3** in_ga su3 ALIGN st; const su3 *w1, *w2, *w3; -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(staples) -#endif - _su3_zero(*staple); for (int k = 0; k < 4; k++) { if (k != mu) { @@ -61,9 +57,6 @@ void get_staples(su3* const staple, const int x, const int mu, const su3** in_ga _su3d_times_su3_acc(*staple, *w1, st); } } -#ifdef TM_KOJAK_INST -#pragma pomp inst end(staples) -#endif } void get_spacelike_staples(su3* const staple, const int x, const int mu, @@ -72,10 +65,6 @@ void get_spacelike_staples(su3* const staple, const int x, const int mu, su3 ALIGN st; const su3 *w1, *w2, *w3; -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(staples) -#endif - _su3_zero(*staple); for (int k = 1; k < 4; k++) { if (k != mu) { @@ -98,9 +87,6 @@ void get_spacelike_staples(su3* const staple, const int x, const int mu, _su3d_times_su3_acc(*staple, *w1, st); } } -#ifdef TM_KOJAK_INST -#pragma pomp inst end(staples) -#endif } void get_timelike_staples(su3* const staple, const int x, const int mu, @@ -109,10 +95,6 @@ void get_timelike_staples(su3* const staple, const int x, const int mu, su3 ALIGN st; const su3 *w1, *w2, *w3; -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(staples) -#endif - _su3_zero(*staple); int k = 0; if (k != mu) { @@ -134,7 +116,4 @@ void get_timelike_staples(su3* const staple, const int x, const int mu, /* v = v + w1^d * st */ _su3d_times_su3_acc(*staple, *w1, st); } -#ifdef TM_KOJAK_INST -#pragma pomp inst end(staples) -#endif } diff --git a/src/lib/operator/Hopping_Matrix_nocom.c b/src/lib/operator/Hopping_Matrix_nocom.c index c7814bbb0..00c34c38b 100644 --- a/src/lib/operator/Hopping_Matrix_nocom.c +++ b/src/lib/operator/Hopping_Matrix_nocom.c @@ -48,8 +48,5 @@ #define Hopping_Matrix Hopping_Matrix_nocom #define _NO_COMM 1 -#ifdef TM_KOJAK_INST -#undef TM_KOJAK_INST -#endif #include "Hopping_Matrix.c" diff --git a/src/lib/operator/halfspinor_body.c b/src/lib/operator/halfspinor_body.c index 3be906764..8286c89f3 100644 --- a/src/lib/operator/halfspinor_body.c +++ b/src/lib/operator/halfspinor_body.c @@ -30,10 +30,6 @@ halfspinor* restrict* phi ALIGN; halfspinor32* restrict* phi32 ALIGN; _declare_hregs(); -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(hoppingmatrix) -#endif - #ifndef TM_USE_OMP s = k; _prefetch_spinor(s); @@ -320,6 +316,3 @@ if (g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) { #endif } } -#ifdef TM_KOJAK_INST -#pragma pomp inst end(hoppingmatrix) -#endif diff --git a/src/lib/quda_interface.c b/src/lib/quda_interface.c index a746a7261..0e55f5cb9 100644 --- a/src/lib/quda_interface.c +++ b/src/lib/quda_interface.c @@ -2059,9 +2059,7 @@ void _setQudaMultigridParam(QudaMultigridParam *mg_param) { // this is needed after QUDA commit // https://github.com/lattice/quda/commit/7903288629f0fcc474989fec5a1393ecc17a4b42 -#ifdef TM_QUDA_EXPERIMENTAL mg_param->n_vec_batch[level] = 1; -#endif // set the MG EigSolver parameters, almost equivalent to // setEigParam from QUDA's multigrid_invert_test, except @@ -3031,7 +3029,6 @@ void quda_mg_tune_params(void *spinorOut, void *spinorIn, const int max_iter) { free(tunable_params); } -#ifdef TM_QUDA_FERMIONIC_FORCES void compute_cloverdet_derivative_quda(monomial *const mnl, hamiltonian_field_t *const hf, spinor *const X_o, spinor *const phi, int detratio) { tm_stopwatch_push(&g_timers, __func__, ""); @@ -3131,23 +3128,6 @@ void compute_ndcloverrat_derivative_quda(monomial *const mnl, hamiltonian_field_ tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); } -#else -void compute_cloverdet_derivative_quda(monomial *const mnl, hamiltonian_field_t *const hf, - spinor *const X_o, spinor *const phi, int detratio) { - tm_debug_printf(0, 0, - "Error: UseExternalLibrary = quda requires that tmLQCD is compiled with " - "--enable-quda_fermionic=yes\n"); - exit(1); -} -void compute_ndcloverrat_derivative_quda(monomial *const mnl, hamiltonian_field_t *const hf, - spinor **const Qup, spinor **const Qdn, - solver_params_t *solver_params, int detratio) { - tm_debug_printf(0, 0, - "Error: UseExternalLibrary = quda requires that tmLQCD is compiled with " - "--enable-quda_fermionic=yes\n"); - exit(1); -} -#endif void compute_WFlow_quda(const double eps, const double tmax, const int traj, FILE *outfile) { tm_stopwatch_push(&g_timers, __func__, ""); diff --git a/src/lib/test/check_overlap.c b/src/lib/test/check_overlap.c index b032f8cdd..d34e2ae5b 100644 --- a/src/lib/test/check_overlap.c +++ b/src/lib/test/check_overlap.c @@ -105,11 +105,6 @@ int main(int argc, char *argv[]) { char *gaugecksum = NULL; double plaquette_energy; -#ifdef TM_KOJAK_INST -#pragma pomp inst init -#pragma pomp inst begin(main) -#endif - #ifdef TM_USE_LEMON MPI_File fh; LemonWriter *lemonWriter; @@ -389,7 +384,4 @@ int main(int argc, char *argv[]) { free_chi_dn_spinor_field(); } return (0); -#ifdef TM_KOJAK_INST -#pragma pomp inst end(main) -#endif } diff --git a/src/lib/update_gauge.c b/src/lib/update_gauge.c index af4730e01..7a7dd34a1 100644 --- a/src/lib/update_gauge.c +++ b/src/lib/update_gauge.c @@ -56,43 +56,23 @@ void update_gauge(const double step, hamiltonian_field_t *const hf) { #endif #ifdef TM_USE_OMP -#define static -#pragma omp parallel - { +#pragma omp parallel for #endif - int i, mu; - static su3 v, w; - su3 *z; - static su3adj deriv; - su3adj *xm; -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(updategauge) -#endif - -#ifdef TM_USE_OMP -#undef static -#endif - -#ifdef TM_USE_OMP -#pragma omp for -#endif - for (i = 0; i < VOLUME; i++) { - for (mu = 0; mu < 4; mu++) { - /* moment[i][mu] = h_{i,mu}^{alpha} */ - xm = &hf->momenta[i][mu]; - z = &hf->gaugefield[i][mu]; - _su3adj_assign_const_times_su3adj(deriv, step, *xm); - exposu3(&w, &deriv); - restoresu3(&v, &w); - _su3_times_su3(w, v, *z); - restoresu3(&v, &w); - _su3_assign(*z, v); - } + for (int i = 0; i < VOLUME; i++) { + for (int mu = 0; mu < 4; mu++) { + /* moment[i][mu] = h_{i,mu}^{alpha} */ + su3 v, w; + su3adj *xm = &hf->momenta[i][mu]; + su3 *z = &hf->gaugefield[i][mu]; + su3adj deriv; + _su3adj_assign_const_times_su3adj(deriv, step, *xm); + exposu3(&w, &deriv); + restoresu3(&v, &w); + _su3_times_su3(w, v, *z); + restoresu3(&v, &w); + _su3_assign(*z, v); } - -#ifdef TM_USE_OMP - } /* OpenMP parallel closing brace */ -#endif + } #ifdef TM_USE_MPI /* for parallelization */ @@ -115,7 +95,4 @@ void update_gauge(const double step, hamiltonian_field_t *const hf) { tm_stopwatch_pop(&g_timers, 0, 1, ""); return; -#ifdef TM_KOJAK_INST -#pragma pomp inst end(updategauge) -#endif } diff --git a/src/lib/xchange/xchange_2fields.c b/src/lib/xchange/xchange_2fields.c index c311bf908..46496a0ba 100644 --- a/src/lib/xchange/xchange_2fields.c +++ b/src/lib/xchange/xchange_2fields.c @@ -52,10 +52,6 @@ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) { int ix = 0; #endif -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(xchange2fields) -#endif - #ifdef TM_USE_MPI /* send the data to the neighbour on the left */ @@ -237,8 +233,5 @@ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) { MPI_Waitall(reqcount, requests, status); #endif return; -#ifdef TM_KOJAK_INST -#pragma pomp inst end(xchange2fields) -#endif } #endif /* TM_NON_BLOCKING */ diff --git a/src/lib/xchange/xchange_halffield.c b/src/lib/xchange/xchange_halffield.c index 3948aa1ca..0dd1effca 100644 --- a/src/lib/xchange/xchange_halffield.c +++ b/src/lib/xchange/xchange_halffield.c @@ -176,9 +176,6 @@ void xchange_halffield() { int reqcount = 16; #endif -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(xchangehalf) -#endif /* send the data to the neighbour on the right in t direction */ /* recieve the data from the neighbour on the left in t direction */ MPI_Isend((void*)(sendBuffer), LX * LY * LZ * 12 / 2, MPI_DOUBLE, g_nb_t_up, 81, g_cart_grid, @@ -245,10 +242,6 @@ void xchange_halffield() { MPI_Waitall(reqcount, requests, status); #endif /* MPI */ return; - -#ifdef TM_KOJAK_INST -#pragma pomp inst end(xchangehalf) -#endif } #endif /* def (TM_USE_SHMEM || TM_PERSISTENT) */ @@ -265,9 +258,6 @@ void xchange_halffield32() { int reqcount = 12; #elif defined TM_PARALLELXYZT int reqcount = 16; -#endif -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(xchangehalf32) #endif /* send the data to the neighbour on the right in t direction */ @@ -336,8 +326,5 @@ void xchange_halffield32() { MPI_Waitall(reqcount, requests, status); #endif /* MPI */ return; -#ifdef TM_KOJAK_INST -#pragma pomp inst end(xchangehalf32) -#endif } #endif /* defined TM_USE_HALFSPINOR */ diff --git a/src/lib/xchange/xchange_lexicfield.c b/src/lib/xchange/xchange_lexicfield.c index 56cc4315c..282ca8dfa 100644 --- a/src/lib/xchange/xchange_lexicfield.c +++ b/src/lib/xchange/xchange_lexicfield.c @@ -60,12 +60,8 @@ void xchange_lexicfield(spinor* const l) { #elif defined TM_PARALLELXYZT int reqcount = 16; #endif -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(xchange_lexicfield) -#endif #ifdef TM_USE_MPI - /* send the data to the neighbour on the left */ /* recieve the data from the neighbour on the right */ MPI_Isend((void*)l, 1, lfield_time_slice_cont, g_nb_t_dn, 5081, g_cart_grid, &requests[0]); @@ -135,9 +131,6 @@ void xchange_lexicfield(spinor* const l) { #endif return; -#ifdef TM_KOJAK_INST -#pragma pomp inst end(xchange_lexicfield) -#endif } /* Here comes the naive version */ @@ -149,12 +142,8 @@ void xchange_lexicfield(spinor* const l) { #ifdef TM_PARALLELXYZT int x0 = 0, x1 = 0, x2 = 0, ix = 0; #endif -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(xchange_lexicfield) -#endif #ifdef TM_USE_MPI - MPI_Status status; /* send the data to the neighbour on the left */ /* recieve the data from the neighbour on the right */ @@ -214,9 +203,6 @@ void xchange_lexicfield(spinor* const l) { #endif #endif return; -#ifdef TM_KOJAK_INST -#pragma pomp inst end(xchange_lexicfield) -#endif } #endif @@ -239,12 +225,8 @@ void xchange_lexicfield32(spinor32* const l) { #elif defined TM_PARALLELXYZT int reqcount = 16; #endif -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(xchange_lexicfield32) -#endif #ifdef TM_USE_MPI - /* send the data to the neighbour on the left */ /* recieve the data from the neighbour on the right */ MPI_Isend((void*)l, 1, lfield_time_slice_cont32, g_nb_t_dn, 5081, g_cart_grid, &requests[0]); @@ -314,9 +296,6 @@ void xchange_lexicfield32(spinor32* const l) { #endif return; -#ifdef TM_KOJAK_INST -#pragma pomp inst end(xchange_lexicfield32) -#endif } /* Here comes the naive version */ @@ -328,12 +307,8 @@ void xchange_lexicfield32(spinor32* const l) { #ifdef TM_PARALLELXYZT int x0 = 0, x1 = 0, x2 = 0, ix = 0; #endif -#ifdef TM_KOJAK_INST -#pragma pomp inst begin(xchange_lexicfield32) -#endif #ifdef TM_USE_MPI - MPI_Status status; /* send the data to the neighbour on the left */ /* recieve the data from the neighbour on the right */ @@ -394,9 +369,6 @@ void xchange_lexicfield32(spinor32* const l) { #endif #endif return; -#ifdef TM_KOJAK_INST -#pragma pomp inst end(xchange_lexicfield32) -#endif } #endif From a24536ddff5eb622b1d87ca821adc70b482b9435 Mon Sep 17 00:00:00 2001 From: Mathieu Taillefumier Date: Thu, 19 Feb 2026 14:39:07 +0100 Subject: [PATCH 07/19] Added basic documentation --- .ci/include/cscs/01-test-templates.yml | 37 ++-- .../repo/packages/lemonio/package.py | 2 +- .../repo/packages/tmlqcd/package.py | 113 ++++++++++ CMakeLists.txt | 19 +- README.md | 131 ++++++++++++ cmake/FindCLime.cmake | 19 +- cmake/tmlqcd_config_internal.h.in | 2 +- doc/install.tex | 201 ++++++++++-------- install-sh | 0 quda_gauge_paths.inc | 158 -------------- 10 files changed, 397 insertions(+), 285 deletions(-) create mode 100644 .ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/tmlqcd/package.py create mode 100644 README.md delete mode 100644 install-sh delete mode 100644 quda_gauge_paths.inc diff --git a/.ci/include/cscs/01-test-templates.yml b/.ci/include/cscs/01-test-templates.yml index 9a4a8da45..9b3a1c414 100644 --- a/.ci/include/cscs/01-test-templates.yml +++ b/.ci/include/cscs/01-test-templates.yml @@ -8,30 +8,29 @@ include: image: ${UENV_NAME}/${UENV_VERSION}:${UENV_TAG} variables: WITH_UENV_VIEW: "default" - CFLAGS: "-O3 -fopenmp -mtune=neoverse-v2 -mcpu=neoverse-v2" - CXXFLAGS: "-O3 -fopenmp -mtune=neoverse-v2 -mcpu=neoverse-v2" - LDFLAGS: "-fopenmp" +# CFLAGS: "-O3 -fopenmp -mtune=neoverse-v2 -mcpu=neoverse-v2" +# CXXFLAGS: "-O3 -fopenmp -mtune=neoverse-v2 -mcpu=neoverse-v2" +# LDFLAGS: "-fopenmp" before_script: - | if test "${SLURM_PROCID}" -eq "0"; then export CC="$(which mpicc)" export CXX="$(which mpicxx)" - mkdir -p install_dir - autoconf - ./configure \ - --enable-quda_experimental \ - --enable-mpi \ - --enable-omp \ - --with-mpidimension=4 \ - --disable-sse2 \ - --disable-sse3 \ - --enable-alignment=32 \ - --with-qudadir="/user-environment/env/default" \ - --with-limedir="/user-environment/env/default" \ - --with-lemondir="/user-environment/env/default" \ - --with-lapack="-lopenblas -L/user-environment/env/default/lib" \ - --with-cudadir="/user-environment/env/default/lib64" \ - --prefix="$(pwd)/install_dir" + mkdir -p build_dir + cd build_dir + cmake -DCMAKE_PREFIX_PATH="/user-environment/env/default" \ + -DTM_USE_MPI=ON \ + -DTM_USE_CUDA=ON \ + -DCMAKE_C_CFLAGS="-O3 -mtune=neoverse-v2 -mcpu=neoverse-v2" \ + -DCMAKE_CXX_FLAGS="-O3 -mtune=neoverse-v2 -mcpu=neoverse-v2" \ + -DCMAKE_CUDA_ARCHITECTURES=90a \ + -DTM_USE_OMP=ON \ + -DTM_USE_QUDA=ON \ + -DTM_USE_LEMON=ON \ + -DTM_ENABLE_ALIGNMENT=32 \ + -DTM_USE_GAUGE_COPY-ON \ + -DTM_USE_HALFSPINOR=ON \ + -DCMAKE_INSTALL_PREFIX=../install_dir .. make make install touch preparation-done-${CI_JOB_ID} diff --git a/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py b/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py index 7508b4b79..4d7340a03 100755 --- a/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py +++ b/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py @@ -28,7 +28,7 @@ class CMakeBuilder(cmake.CMakeBuilder): def cmake_args(self): spec = self.spec args = [ - self.define_from_variant("DBUILD_SHARED_LIBS" "shared"), + self.define_from_variant("DBUILD_SHARED_LIBS", "shared"), ] return args diff --git a/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/tmlqcd/package.py b/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/tmlqcd/package.py new file mode 100644 index 000000000..13fb3238e --- /dev/null +++ b/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/tmlqcd/package.py @@ -0,0 +1,113 @@ +# Copyright Spack Project Developers. See COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +from spack_repo.builtin.build_systems.cmake import CmakePackage +from spack_repo.builtin.build_systems.rocm import ROCmPackage +from spack_repo.builtin.build_systems.cuda import CudaPackage + +from spack.package import * + +class Tmlqcd(CmakePackage, CudaPackage, ROCmPackage): +"""Base class for building tmlQCD.""" + + homepage = "https://www.itkp.uni-bonn.de/~urbach/software.html" + url = "https://github.com/etmc/tmLQCD/archive/refs/tags/rel-5-1-6.tar.gz" + git = "https://github.com/etmc/tmLQCD.git" + license("GPL-3.0-or-later") + + maintainers("mtaillefumier") + version("master", branch="master") + + variant("lemon", default=False, description="Enable the lemon backend") + variant("mpi", default=True, description="Enable mpi support") + variant("DDalphaAMG", default=False, description="Enable DAlphaAMG support") + variant("openmp", default=True, description="Enable OpenMP") + variant("fftw", default=True, description="Enable FFTW interface") + variant( + "persistent_mpi", + default=True, + description="Enable persistent mpi calls for spinor and gauge fields", + when="+mpi", + ) + variant( + "nonblocking_mpi", + default=True, + description="Enable non-blocking mpi calls for spinor and gauge fields", + when="+mpi", + ) + variant("fixedvolume", default=True, description="Enable fixed volume at compile time") + variant( + "alignment", + default="auto", + values=("none", "auto", "16", "32", "64"), + description="Automatically or expliclty align arrays", + ) + variant("gauge_copy", default=True, description="Enable gauge field copy") + variant("half_spinor", default=True, description="Use a Dirac operator with half-spinor") + variant("shared", default=False, description="Enable shared library") + variant("shmem", default=False, description="Use shmem API") + variant("quda", default=True, description="Enable the QUDA library", when="+cuda",) + variant("quda", default=True, description="Enable the QUDA library", when="+rocm",) + variant( + "QPhiX", default=False, description="Enable the QPhiX library for Intel Xeon and Xeon Phis" + ) + variant( + "mpi_dimensions", + default="4", + values=("1", "2", "3", "4", "x", "xy", "xyz"), + description="number of dimensions the mpi processes are distributed. the default is parallelization over all four dimensions txyz", + when="+mpi", + ) + + generator("ninja") + + # language dependencies + depends_on("c", type="build") + depends_on("cxx", type="build") + depends_on("fortran", type="build") + + # conflicts + conflicts("+cuda", when="cuda_arch=none") + conflicts("+rocm", when="amdgpu_target=none") + + # hard dependencies + depends_on("c-lime") + depends_on("blas") + depends_on("lapack") + depends_on("pkgconfig", type="build") + + # dependencies + depends_on("mpi", when="+mpi") + depends_on("lemon-io", when="+lemon") + + with when("+quda"): + depends_on( + "quda+twisted_mass+twisted_clover+clover+ndeg_twisted_clover+ndeg_twisted_mass+wilson+qdp+staggered+usqcd+multigrid" + ) + + depends_on("quda+mpi", when="+mpi") + depends_on("quda+cuda", when="+cuda") + depends_on("quda+rocm", when="+rocm") + depends_on("quda+nvshmem", when="+shmem") + + depends_on("fftw-api@3", when="+fftw") + +class CMakeBuilder(cmake.CMakeBuilder): + def cmake_args(self): + spec = self.spec + args = [ + self.define_from_variant("DBUILD_SHARED_LIBS", "shared"), + self.define_from_variant("TM_USE_LEMON", "lemon"), + self.define_from_variant("TM_USE_MPI", "mpi"), + self.define_from_variant("TM_USE_QUDA", "quda"), + self.define_from_variant("TM_USE_CUDA","cuda"), + self.define_from_variant("TM_USE_HIP", "cuda"), + self.define_from_variant("TM_USE_FFTW", "fftw"), + self.define_from_variant("TM_FIXEDVOLUME", "fixed_volume"), + self.define_from_variant("TM_USE_OMP", "openmp"), + self.define_from_variant("TM_USE_SHMEM", "shmem"), + self.define_from_variant("TM_USE_GAUGE_COPY", "gauge_copy"), + self.define_from_variant("TM_USE_HALFSPINOR", "half_spinor"), + ] + return args diff --git a/CMakeLists.txt b/CMakeLists.txt index 803feeef7..d363e407c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -80,8 +80,8 @@ option(TM_USE_MPI "Enable MPI support" OFF) option(TM_USE_CUDA "Enable QUDA support" OFF) option(TM_USE_HIP "Enable HIP support" OFF) option(TM_USE_DDalphaAMG "Enable DDalphaAMG support" OFF) -option(TM_USE_OMP "Enable openMP" ON) -option(TM_FIXEDVOLUME "fix volume at compile time" OFF) +option(TM_USE_OMP "Enable OpenMP" ON) +option(TM_FIXEDVOLUME "Fix volume at compile time" OFF) set(TM_ENABLE_ALIGNMENT "auto" CACHE @@ -95,7 +95,7 @@ set_property(CACHE TM_ENABLE_ALIGNMENT PROPERTY STRINGS "auto" "none" "16" "32" option(TM_USE_OPTIMIZATION "enable optimisation" ON) option(TM_USE_GAUGE_COPY "Enable use of a copy of the gauge field" ON) option(TM_USE_HALFSPINOR "Use a Dirac Op. with halfspinor exchange" ON) -option(TM_USE_QPHIX "enable QPhiX" OFF) +option(TM_USE_QPHIX "Enable QPhiX" OFF) option(TM_USE_SHMEM "Use shmem API" OFF) option(TM_USE_QUDA "Enable QUDA support" OFF) option(TM_ENABLE_WARNINGS "Enable all warnings" ON) @@ -106,7 +106,7 @@ cmake_dependent_option( TM_PERSISTENT_MPI "Use persistent MPI calls for halfspinor [default=no]" OFF "TM_USE_MPI" OFF) cmake_dependent_option( - TM_NONBLOCKING_MPI "Use non-blocking MPI calls for spinor and gaug" ON + TM_NONBLOCKING_MPI "Use non-blocking MPI calls for spinor and gauge" ON "TM_USE_MPI" OFF) # need to do it properly. Just a place holder @@ -132,8 +132,6 @@ find_package(BLAS REQUIRED) find_package(LAPACK REQUIRED) set(TM_LAPACK ON) find_package(FLEX REQUIRED) -# do we need bison ? -find_package(BISON REQUIRED) set(PACKAGE_NAME ${PROJECT_DESCRIPTION}) set(PACKAGE_VERSION ${PROJECT_VERSION}) @@ -201,12 +199,6 @@ set(TM_USE_LIME ON) if(TM_USE_QUDA) find_package(QUDA REQUIRED config) - if(TM_USE_QUDA_EXPERIMENTAL) - set(TM_QUDA_EXPERIMENTAL ON) - endif() - if(TM_QUDA_FERMIONIC_FORCES) - set(TM_QUDA_FERMIONIC_FORCES ON) - endif() endif() if(TM_USE_SHMEM) @@ -293,6 +285,9 @@ if(TM_USE_MPI) endif() endif() +if (TM_USE_HALFSPINOR AND NOT TM_USE_GAUGE_COPY) + message(FATAL_ERROR "The TM_USE_GAUGE_COPY option should also be set to ON when TM_USE_HALFSPINOR is ON") +endif() # keep the autotool config.h header. configure_file("${PROJECT_SOURCE_DIR}/cmake/tmlqcd_config_internal.h.in" "${PROJECT_BINARY_DIR}/tmlqcd_config_internal.h" @ONLY) diff --git a/README.md b/README.md new file mode 100644 index 000000000..84f1e2172 --- /dev/null +++ b/README.md @@ -0,0 +1,131 @@ +The software ships with a CMake environment, which will configure and build the +programmes. It is recommended to configure and build the executables in a +separate build directory. This also allows to have several builds with different +options from the same source code directory. + +## Prerequisites + +In order to compile the programmes the `LAPACK` library (fortran version) needs to be installed. CMake will search for the +library in all default directories. Also the latest version (tested is version +1.2.3) of `C-LIME` must be available, which is used as +a packaging scheme to read and write gauge configurations and propagators to +files. + +## Configuring the hmc package +:label{sec:config} + +The build system uses CMake to configure and build the hmc package. The +following list gives all options (OFF by default unless specified): +- `CMAKE_POSITION_INDEPENDENT_CODE`: Build a position independent + code. **ON** by default. +- `BUILD_SHARED_LIBS`: Build the shared version of the hmc library. +- `TM_USE_FFTW`: Enable fftw support. +- `TM_USE_CUDA`: Enable CUDA support. +- `TM_USE_HIP`: Enable HIP support (AMD or NVidia GPUs) +- `TM_USE_DDalphaAMG`: Enable DDalphaAMG support. +- `TM_USE_LEMON`: Use the lemon io library. +- `TM_USE_OMP`: Enable OpenMP (**ON** by default) +- `TM_FIXEDVOLUME`: Fix volume at compile time. +- `TM_ENABLE_ALIGNMENT`: Automatically or expliclty align arrays to + byte number. auto, none, 16, 32, 64. +- `TM_USE_GAUGE_COPY`: Enable use of a copy of the gauge field (**ON** + by default). See section ref{sec:dirac} for details on this option. It will + increase the memory requirement of the code. +- `TM_USE_HALFSPINOR`: Use a Dirac Op. with halfspinor exchange (**ON** + by default). See sub-section ref{sec:dirac} for details. +- `TM_USE_QUDA`: Enable QUDA support. +- `TM_USE_SHMEM`: Use shmem API. +- `TM_ENABLE_WARNINGS`: Enable all warnings (**ON** by default). +- `TM_ENABLE_TESTS`: Enable tests. +- `TM_USE_QPHIX`: Enable QPhiX. + - `TM_QPHIX_SOALEN`: QPhiX specific parameter (default is 4) + - **QPHIX_DIR**: Directory where QPhiX is installed. + The QPhiX current CMake build system does not export all information ( + include and lib directories) that are needed to compile hmc. + - **QMP_DIR**: Directory where QMP is installed ( + QPhiX dependency). + The QPhiX current CMake build system does not export all information about the + include and lib directories nor its dependencies (QMP in that case). +- `TM_USE_MPI`: Enable MPI support. + - `TM_PERSISTENT_MPI`: Use persistent MPI calls for halfspinor. + - `TM_NONBLOCKING_MPI`: Use non-blocking MPI calls for spinor and + gauge. + - `TM_MPI_DIMENSION`: Use $n$ dimensional parallelisation ($XYZT$) + [default=4]. The number of parallel directions can be specified. $1, 2, 3$ and $4$ + dimensional parallelisation is supported. + - `TM_USE_LEMON` Use the lemon io library + +The following minimal list of commands will configure and build the hmc package with +minimal dependencies + +```bash +mkdir build +cd build +cmake -DCMAKE_INSTALL_PREFIX=/my_path -DCMAKE_PREFIX_PATH=/my_c_line_path .. +make -j +make install +''' + +These instructions assume that the `c-lime` package is installed in `/my_c_line_path`. By default `CMAKE_PREFIX_PATH` variable is a list +of paths separated by a semi-colunm containing the path of all installed to +dependencies. + +Adding `-DTM_USE_MPI=ON` will enable MPI support with parallelization +over spatial and temporal dimensions. The command line is then + +```bash +cmake -DCMAKE_INSTALL_PREFIX=/my_path -DCMAKE_PREFIX_PATH=/my_c_line_path -DTM_USE_MPI=ON .. +''' + +We can combine it with the lemon-io library (isntalled in `/my_lemon_path`) + +```bash +cmake -DCMAKE_INSTALL_PREFIX=/my_path \ + -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path" \ + -DTM_USE_MPI=ON \ + -DTM_USE_LEMON=ON .. +''' + +`QUDA` support (installed in `/my_quda_path`) can be added with + +```bash +cmake -DCMAKE_INSTALL_PREFIX=/my_path \ + -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path;/my_quda_path" \ + -DTM_USE_MPI=ON \ + -DTM_USE_LEMON=ON \ + -DTM_USE_QUDA \ + -DTM_USE_CUDA=ON \ + -DCMAKE_CUDA_ARCHITECTURES=90 .. +''' + +Note that the command assumes that QUDA is compiled with `CUDA` support. AMD GPU +are also supported after replacing `-DTM_USE_CUDA=ON` with +`-DTM_USE_HIP=ON` and compiling `QUDA` with `HIP` support. The ROCM architecture is defined by the variable +`CMAKE_HIP_ARCHITECTURES=gfxxxx`. + +`QPhiX` and/or `DDalphaAMG` support can be added with + +```bash +cmake -DCMAKE_INSTALL_PREFIX=/my_path \ + -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path;/my_quda_path;/my_path_ddalphaamg" \ + -DTM_USE_MPI=ON \ + -DTM_USE_LEMON=ON \ + -DTM_USE_QUDA=ON \ + -DTM_USE_CUDA=ON \ + -DCMAKE_CUDA_ARCHITECTURES=90 \ + -DTM_USE_QPHIX=ON \ + -DQPHIX_DIR=/my_qphix_dir \ + -DTM_USE_DDalphaAMG=ON \ + -DQMP_DIR=/my_qmp_dir \ + -DTM_USE_OMP=ON .. +''' + +`QPhiX` cmake config support is incomplete and requires both the QPhiX +and QMP installation directories to work properly. + +`CMake` has several relevant specific options that control the build. Compiler +options are defined by the variable `CMAKE_C_FLAGS` and `CMAKE_CXX_FLAGS`. CUDA and HIP compilations options are controlled by their +equivalent `CMAKE_{CUDA/HIP}_FLAGS`. + +Adding for instance `-GNinja` to the `CMake` command line will use +ninja instead of make. diff --git a/cmake/FindCLime.cmake b/cmake/FindCLime.cmake index 0c3eabe48..c9d94ea95 100644 --- a/cmake/FindCLime.cmake +++ b/cmake/FindCLime.cmake @@ -1,27 +1,26 @@ include(FindPackageHandleStandardArgs) find_library( - TMLQCD_CLIME_LIBRARIES + TM_CLIME_LIBRARIES NAMES lime PATH_SUFFIXES "lib" "lib64") find_path( - TMLQCD_CLIME_INCLUDE_DIRS + TM_CLIME_INCLUDE_DIRS NAMES lime.h PATH_SUFFIXES "include" "include/${_pacakge_name}" "${_package_name}") -message("${TMLQCD_CLIME_INCLUDE_DIRS}") -find_package_handle_standard_args(CLime DEFAULT_MSG TMLQCD_CLIME_LIBRARIES - TMLQCD_CLIME_INCLUDE_DIRS) +find_package_handle_standard_args(CLime DEFAULT_MSG TM_CLIME_LIBRARIES + TM_CLIME_INCLUDE_DIRS) if(NOT TARGET tmlqcd::clime) add_library(tmlqcd::clime INTERFACE IMPORTED) set_target_properties(tmlqcd::clime PROPERTIES INTERFACE_LINK_LIBRARIES - "${TMLQCD_CLIME_LIBRARIES}") + "${TM_CLIME_LIBRARIES}") set_target_properties(tmlqcd::clime PROPERTIES INTERFACE_INCLUDE_DIRECTORIES - "${TMLQCD_CLIME_INCLUDE_DIRS}") + "${TM_CLIME_INCLUDE_DIRS}") endif() -set(TMLQCD_CLIME_FOUND ON) -mark_as_advanced(TMLQCD_CLIME_FOUND TMLQCD_CLIME_LIBRARIES - TMLQCD_CLIME_INCLUDE_DIRS) +set(TM_CLIME_FOUND ON) +mark_as_advanced(TM_CLIME_FOUND TM_CLIME_LIBRARIES + TM_CLIME_INCLUDE_DIRS) diff --git a/cmake/tmlqcd_config_internal.h.in b/cmake/tmlqcd_config_internal.h.in index 145df156a..7c11d0446 100644 --- a/cmake/tmlqcd_config_internal.h.in +++ b/cmake/tmlqcd_config_internal.h.in @@ -79,7 +79,7 @@ //#cmakedefine YYTEXT_POINTER /* Number of bits in a file offset, on hosts where this is settable. */ -#define TM_FILE_OFFSET_BITS @TMLQCD_FILE_OFFSET_BITS@ +#define TM_FILE_OFFSET_BITS @TM_FILE_OFFSET_BITS@ /* Construct an extra copy of the gauge fields */ #cmakedefine TM_USE_GAUGE_COPY diff --git a/doc/install.tex b/doc/install.tex index e4d86c2da..9d5e6f887 100644 --- a/doc/install.tex +++ b/doc/install.tex @@ -1,103 +1,136 @@ -The software ships with a GNU autoconf environment and a configure -script, which will generate GNU Makefiles to build the programmes. It -is supported and recommended to configure and build the executables in -a separate build directory. This also allows to have several builds with -different options from the same source code directory. +The software ships with a CMake environment, which will configure and build the +programmes. It is recommended to configure and build the executables in a +separate build directory. This also allows to have several builds with different +options from the same source code directory. \subsection{Prerequisites} -In order to compile the programmes the {\ttfamily - LAPACK}~\cite{lapack:web} library (fortran version) needs to be -installed. In addition it must be known which linker options are -needed to link against {\ttfamily LAPACK}, e.g. {\ttfamily - -Lpath-to-lapack -llapack -lblas}. Also a the latest -version (tested is version 1.2.3) of {\ttfamily - C-LIME}~\cite{lime:web} must be available, which is used as a -packaging scheme to read and write gauge configurations and -propagators to files. +In order to compile the programmes the {\ttfamily LAPACK}~\cite{lapack:web} +library (fortran version) needs to be installed. CMake will search for the +library in all default directories. Also the latest version (tested is version +1.2.3) of {\ttfamily C-LIME}~\cite{lime:web} must be available, which is used as +a packaging scheme to read and write gauge configurations and propagators to +files. \subsection{Configuring the hmc package} \label{sec:config} -In order to get a simple configuration of the hmc package it is enough -to just type -\begin{verbatim} -path-to-src-code/configure --with-lime= \ - --with-lapack= CC= \ - F77= CFLAGS= -\end{verbatim} -in the build directory. If -{\ttfamily CC, F77} and {\ttfamily CFLGAS} are not specified, -{\ttfamily configure} will guess them. - -The code was successfully compiled and run at least on the following -platforms: i686 and compatible, x64 and compatible, IBM Regatta -systems, IBM Blue Gene/L, IBM Blue Gene/P, SGI Altix and SGI PC -clusters, powerpc clusters. - -The configure script accepts certain options to influence the building -procedure. One can get an overview over all supported options with -{\ttfamily configure --help}. There are {\ttfamily enable|disable} -options switching on and off optional features and {\ttfamily - with|without} switches usually related to optional packages. In the -following we describe the most important of them (check {\ttfamily - configure --help} for the defaults and more options): - +The build system uses CMake to configure and build the hmc package. The +following list gives all options (OFF by default unless specified): \begin{itemize} -\item {\ttfamily --enable-mpi}:\\ - This option switches on the support for MPI. On certain platforms it - automatically chooses the correct parallel compiler or searches for - a command {\ttfamily mpicc} in the search path. - -\item {\ttfamily --enable-gaugecopy}:\\ - See section \ref{sec:dirac} for details on this option. It will +\item {\ttfamily CMAKE\_POSITION\_INDEPENDENT\_CODE}: Build a position independent + code. ON by default. +\item {\ttfamily BUILD\_SHARED\_LIBS}: Build the shared version of the hmc library. +\item {\ttfamily TM\_USE\_FFTW}: Enable fftw support. +\item {\ttfamily TM\_USE\_CUDA}: Enable CUDA support. +\item {\ttfamily TM\_USE\_HIP}: Enable HIP support (AMD or NVidia GPUs) +\item {\ttfamily TM\_USE\_DDalphaAMG}: Enable DDalphaAMG support. +\item {\ttfamily TM\_USE\_LEMON}: Use the lemon io library. +\item {\ttfamily TM\_USE\_OMP}: Enable OpenMP ({\bf ON} by default) +\item {\ttfamily TM\_FIXEDVOLUME}: Fix volume at compile time. +\item {\ttfamily TM\_ENABLE\_ALIGNMENT}: Automatically or expliclty align arrays to + byte number. auto, none, 16, 32, 64. +\item {\ttfamily TM\_USE\_GAUGE\_COPY}: Enable use of a copy of the gauge field (ON + by default). See section \ref{sec:dirac} for details on this option. It will increase the memory requirement of the code. +\item {\ttfamily TM\_USE\_HALFSPINOR}: Use a Dirac Op. with halfspinor exchange (ON + by default). See sub-section \ref{sec:dirac} for details. +\item {\ttfamily TM\_USE\_QUDA}: Enable QUDA support. +\item {\ttfamily TM\_USE\_SHMEM}: Use shmem API. +\item {\ttfamily TM\_ENABLE\_WARNINGS}: Enable all warnings (ON by default). +\item {\ttfamily TM\_ENABLE\_TESTS}: Enable tests. +\item {\ttfamily TM\_USE\_QPHIX}: Enable QPhiX. + \begin{itemize} + \item {\ttfamily TM\_QPHIX\_SOALEN}: QPhiX specific parameter (default is 4) + \item \textcolor{red}{{\ttfamily QPHIX\_DIR}}: Directory where QPhiX is installed. + The QPhiX current CMake build system does not export all information ( + include and lib directories) that are needed to compile hmc. + \item \textcolor{red}{\ttfamily QMP\_DIR}: Directory where QMP is installed ( + QPhiX dependency). + The QPhiX current CMake build system does not export all information about the + include and lib directories nor its dependencies (QMP in that case). + \end{itemize} +\item {\ttfamily TM\_USE\_MPI}: Enable MPI support. + \begin{itemize} + \item {\ttfamily TM\_PERSISTENT\_MPI}: Use persistent MPI calls for halfspinor. + \item {\ttfamily TM\_NONBLOCKING\_MPI}: Use non-blocking MPI calls for spinor and + gauge. + \item {\ttfamily TM\_MPI\_DIMENSION}: Use $n$ dimensional parallelisation ($XYZT$) + [default=4]. The number of parallel directions can be specified. $1, 2, 3$ and $4$ + dimensional parallelisation is supported. + \item {\ttfamily TM\_USE\_LEMON} Use the lemon io library + \end{itemize} +\end{itemize} -\item {\ttfamily --enable-halfspinor}:\\ - If this option is enabled the Dirac operator using half spinor - fields is used. See sub-section \ref{sec:dirac} for details. If this - feature is switched on, also the gauge copy feature is switched - on automatically. - -%\item {\ttfamily --enable-shmem}:\\ -% Use shared memory API instead of MPI for the communication of spinor -% fields. This is currently only usable on the Munich Altix machine. - -\item {\ttfamily --with-mpidimension=n}:\\ - This option has only effect if the preceding one is switched - on. The number of parallel directions can be specified. 1,2,3 and 4 - dimensional parallelisation is supported. - -\item {\ttfamily --with-lapack=""}:\\ - the code requires lapack to be linked. All linker flags necessary - to do so must be specified here. Note, that {\ttfamily LIBS="..."} - works similar. +The following minimal list of commands will configure and build the hmc package with +minimal dependencies +\begin{verbatim} +mkdir build +cd build +cmake -DCMAKE_INSTALL_PREFIX=/my_path -DCMAKE_PREFIX_PATH=/my_c_line_path .. +make -j +make install +\end{verbatim} -\item {\ttfamily --with-limedir=}:\\ - Tells configure where to find the lime package, which is required for - the build of the HMC. It is used for the ILDG file format. - -\end{itemize} +These instructions assume that the {\ttfamily c-lime} package is installed in {\ttfamily + /my\_c\_line\_path}. By default {\ttfamily CMAKE\_PREFIX\_PATH} variable is a list +of paths separated by a semi-colunm containing the path of all installed to +dependencies. -The configure script will guess at the very beginning on which -platform the build is done. In case this fails or a cross compilation -must be performed please use the option {\ttfamily --host=HOST}. For -instance in order to compile for the BG/P one needs to specify -{\ttfamily --host=ppc-ibm-bprts --build=ppc64-ibm-linux}. +Adding {\ttfamily -DTM\_USE\_MPI=ON} will enable MPI support with parallelization +over spatial and temporal dimensions. The command line is then +\begin{verbatim} +cmake -DCMAKE_INSTALL_PREFIX=/my_path -DCMAKE_PREFIX_PATH=/my_c_line_path -DTM_USE_MPI=ON .. +\end{verbatim} +We can combine it with the lemon-io library (isntalled in {\ttfamily /my\_lemon\_path}) +\begin{verbatim} +cmake -DCMAKE_INSTALL_PREFIX=/my_path \ + -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path" \ + -DTM_USE_MPI=ON \ + -DTM_USE_LEMON=ON .. +\end{verbatim} -For certain architectures like the Blue Gene systems there are -{\ttfamily README.arch} files in the top source directory with -example configure calls. +{\ttfamily QUDA} support (installed in {\ttfamily my\_quda\_path}) can be added with +\begin{verbatim} +cmake -DCMAKE_INSTALL_PREFIX=/my_path \ + -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path;\my_quda_path" \ + -DTM_USE_MPI=ON \ + -DTM_USE_LEMON=ON \ + -DTM_USE_QUDA \ + -DTM_USE_CUDA=ON \ + -DCMAKE_CUDA_ARCHITECTURES=90 .. +\end{verbatim} +Note that the command assumes that QUDA is compiled with CUDA support. AMD GPU +are also supported after replacing {\ttfamily -DTM\_USE\_CUDA=ON} with +{\ttfamily -DTM\_USE\_HIP=ON} and compiling {\ttfamily QUDA} with {\ttfamily + HIP} support. The {\ttfamily ROCM} architecture is defined by the variable +{\ttfamily CMAKE\_HIP\_ARCHITECTURES=gfxxxx}. -\subsection{Building and Installing} +{\ttfamily QPhiX} and/or {\ttfamily DDalphaAMG} support can be added with +\begin{verbatim} +cmake -DCMAKE_INSTALL_PREFIX=/my_path \ + -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path;/my_quda_path;/my_path_ddalphaamg" \ + -DTM_USE_MPI=ON \ + -DTM_USE_LEMON=ON \ + -DTM_USE_QUDA=ON \ + -DTM_USE_CUDA=ON \ + -DCMAKE_CUDA_ARCHITECTURES=90 \ + -DTM_USE_QPHIX=ON \ + -DQPHIX_DIR=/my_qphix_dir \ + -DTM_USE_DDalphaAMG=ON \ + -DQMP_DIR=/my_qmp_dir \ + -DTM_USE_OMP=ON .. +\end{verbatim} +{\ttfamily QPhiX} cmake config support is incomplete and requires both the {\ttfamily QPhiX} +and {\ttfamily QMP} installation directories to work properly. -After successfully configuring the package the code can be build by -simply typing {\ttfamily make} in the build directory. This will -compile the standard executables. Typing {\ttfamily make install} will -copy these executables into the install directory. The default install -directory is {\ttfamily \$HOME/bin}, which can be influenced e.g. with -the {\ttfamily --prefix} option to {\ttfamily configure}. +CMake has several relevant specific options that control the build. Compiler +options are defined by the variable {\ttfamily CMAKE\_C\_FLAGS} and {\ttfamily + CMAKE\_CXX\_FLAGS}. CUDA and HIP compilations options are controlled by their +equivalent {\ttfamily CMAKE\_\{CUDA/HIP\}\_FLAGS}. +Adding for instance {\ttfamily -GNinja} to the {\ttfamily CMake} command line will use +{\ttfamily ninja} instead of {\ttfamily make}. %%% Local Variables: %%% mode: latex diff --git a/install-sh b/install-sh deleted file mode 100644 index e69de29bb..000000000 diff --git a/quda_gauge_paths.inc b/quda_gauge_paths.inc deleted file mode 100644 index d2c898e6c..000000000 --- a/quda_gauge_paths.inc +++ /dev/null @@ -1,158 +0,0 @@ -/*********************************************************************** - * - * Copyright (C) 2021 Bartosz Kostrzewa, Ferenc Pittler, Simone Bacchio - * - * This file is part of tmLQCD. - * - * tmLQCD is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * tmLQCD is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with tmLQCD. If not, see . - * - * - ***********************************************************************/ - -const int plaq_rect_length[24] = { - 3, 3, 3, 3, 3, 3, - 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, - }; - -const int plaq_rect_path[4][24][5] = { - { {1, 7, 6 }, - {6, 7, 1 }, - {2, 7, 5 }, - {5, 7, 2 }, - {3, 7, 4 }, - {4, 7, 3 }, - {1, 1, 7, 6, 6 }, - {6, 6, 7, 1, 1 }, - {2, 2, 7, 5, 5 }, - {5, 5, 7, 2, 2 }, - {3, 3, 7, 4, 4 }, - {4, 4, 7, 3, 3 }, - {0, 1, 7, 7, 6 }, - {6, 7, 7, 1, 0 }, - {0, 2, 7, 7, 5 }, - {5, 7, 7, 2, 0 }, - {0, 3, 7, 7, 4 }, - {4, 7, 7, 3, 0 }, - {0, 4, 7, 7, 3 }, - {3, 7, 7, 4, 0 }, - {0, 5, 7, 7, 2 }, - {2, 7, 7, 5, 0 }, - {0, 6, 7, 7, 1 }, - {1, 7, 7, 6, 0 } }, - { { 2, 6, 5 }, - { 5, 6, 2 }, - { 3, 6, 4 }, - { 4, 6, 3 }, - { 0, 6, 7 }, - { 7, 6, 0 }, - { 1, 2, 6, 6, 5 }, - { 2, 6, 6, 5, 1 }, - { 5, 6, 6, 2, 1 }, - { 1, 5, 6, 6, 2 }, - { 1, 3, 6, 6, 4 }, - { 3, 6, 6, 4, 1 }, - { 4, 6, 6, 3, 1 }, - { 1, 4, 6, 6, 3 }, - { 1, 0, 6, 6, 7 }, - { 0, 6, 6, 7, 1 }, - { 7, 6, 6, 0, 1 }, - { 1, 7, 6, 6, 0 }, - { 5, 5, 6, 2, 2 }, - { 2, 2, 6, 5, 5 }, - { 4, 4, 6, 3, 3 }, - { 3, 3, 6, 4, 4 }, - { 7, 7, 6, 0, 0 }, - { 0, 0, 6, 7, 7 } }, - { {3, 5, 4}, - {4, 5, 3}, - {0, 5, 7}, - {7, 5, 0}, - {1, 5, 6}, - {6, 5, 1}, - {2, 3, 5, 5, 4}, - {3, 5, 5, 4, 2}, - {4, 5, 5, 3, 2}, - {2, 4, 5, 5, 3}, - {2, 0, 5, 5, 7}, - {0, 5, 5, 7, 2}, - {7, 5, 5, 0, 2}, - {2, 7, 5, 5, 0}, - {2, 1, 5, 5, 6}, - {1, 5, 5, 6, 2}, - {6, 5, 5, 1, 2}, - {2, 6, 5, 5, 1}, - {4, 4, 5, 3, 3}, - {3, 3, 5, 4, 4}, - {7, 7, 5, 0, 0}, - {0, 0, 5, 7, 7}, - {6, 6, 5, 1, 1}, - {1, 1, 5, 6, 6} }, - { { 0, 4, 7 }, - { 7, 4, 0 }, - { 1, 4, 6 }, - { 6, 4, 1 }, - { 2, 4, 5 }, - { 5, 4, 2 }, - { 3, 0, 4, 4, 7 }, - { 0, 4, 4, 7, 3 }, - { 7, 4, 4, 0, 3 }, - { 3, 7, 4, 4, 0 }, - { 3, 1, 4, 4, 6 }, - { 1, 4, 4, 6, 3 }, - { 6, 4, 4, 1, 3 }, - { 3, 6, 4, 4, 1 }, - { 3, 2, 4, 4, 5 }, - { 2, 4, 4, 5, 3 }, - { 5, 4, 4, 2, 3 }, - { 3, 5, 4, 4, 2 }, - { 7, 7, 4, 0, 0 }, - { 0, 0, 4, 7, 7 }, - { 6, 6, 4, 1, 1 }, - { 1, 1, 4, 6, 6 }, - { 5, 5, 4, 2, 2 }, - { 2, 2, 4, 5, 5 } } - }; - -const int plaq_length[] = { - 3, 3, 3, 3, 3, 3 }; - -const int plaq_path[4][6][3] = { - { { 1, 7, 6 }, - { 6, 7, 1 }, - { 2, 7, 5 }, - { 5, 7, 2 }, - { 3, 7, 4 }, - { 4, 7, 3 } }, - { { 2, 6, 5 }, - { 5, 6, 2 }, - { 3, 6, 4 }, - { 4, 6, 3 }, - { 0, 6, 7 }, - { 7, 6, 0 } }, - { { 3, 5, 4}, - { 4, 5, 3}, - { 0, 5, 7}, - { 7, 5, 0}, - { 1, 5, 6}, - { 6, 5, 1} }, - { { 0, 4, 7 }, - { 7, 4, 0 }, - { 1, 4, 6 }, - { 6, 4, 1 }, - { 2, 4, 5 }, - { 5, 4, 2 } } - }; - From 065d6d0abfe0eeebba099623a8268e9482fec785 Mon Sep 17 00:00:00 2001 From: Mathieu Taillefumier Date: Tue, 24 Feb 2026 09:58:58 +0100 Subject: [PATCH 08/19] Add alignment detection at configuration time --- CMakeLists.txt | 10 +- cmake/DetectSimdAndAlignment.cmake | 288 +++++++++++++++++++++++++++++ 2 files changed, 295 insertions(+), 3 deletions(-) create mode 100644 cmake/DetectSimdAndAlignment.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index d363e407c..82880ef60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,21 +146,25 @@ set(ALIGN32 " ") message("${TM_ENABLE_ALIGNMENT}") if(${TM_ENABLE_ALIGNMENT} STREQUAL "auto") + include(cmake/DetectSimdAndAlignment.cmake) + message(STATUS "SIMD: ${SIMD_LEVEL} (${SIMD_ARCH_FAMILY}), align=${SIMD_ALIGNMENT}") +endif() +if (${TM_ENABLE_ALIGNMENT} STREQUAL "none") set(ALIGN_BASE "0x00") set(ALIGN " ") set(ALIGN_BASE32 "0x00") set(ALIGN32 " ") -elseif(TM_ENABLE_ALIGNMENT EQUAL 16) +elseif((${TM_ENABLE_ALIGNMENT} STREQUAL "16") OR (${SIMD_ALIGNMENT} EQUAL 16)) set(ALIGN_BASE "0x0F") set(ALIGN "__attribute__ ((aligned (16)))") set(ALIGN_BASE32 "0x0F") set(ALIGN32 "__attribute__ ((aligned (16)))") -elseif(TM_ENABLE_ALIGNMENT EQUAL 32) +elseif((${TM_ENABLE_ALIGNMENT} STREQUAL "32") OR (${SIMD_ALIGNMENT} EQUAL 32)) set(ALIGN_BASE "0x2F") set(ALIGN "__attribute__ ((aligned (32)))") set(ALIGN_BASE32 "0x2F") set(ALIGN32 "__attribute__ ((aligned (32)))") -elseif(TM_ENABLE_ALIGNMENT EQUAL 64) +elseif((${TM_ENABLE_ALIGNMENT} STREQUAL "64") OR (${SIMD_ALIGNMENT} EQUAL 64)) set(ALIGN_BASE "0x3F") set(ALIGN "__attribute__ ((aligned (64)))") set(ALIGN_BASE32 "0x3F") diff --git a/cmake/DetectSimdAndAlignment.cmake b/cmake/DetectSimdAndAlignment.cmake new file mode 100644 index 000000000..707b9b65b --- /dev/null +++ b/cmake/DetectSimdAndAlignment.cmake @@ -0,0 +1,288 @@ +# DetectSimdAndAlignment.cmake +# +# Detect SIMD architecture family, SIMD level and a reasonable alignment value. +# +# Exposed cache variables: +# SIMD_ARCH_FAMILY : x86 / ARM / PPC / UNKNOWN +# SIMD_LEVEL : AVX512 / AVX2 / SSE2 / NEON / ALTIVEC / SCALAR +# SIMD_ALIGNMENT : integer, in bytes (16, 32, 64, ...) +# +# Optional (if you want a configured header): +# SIMD_CONFIG_HEADER : path to the generated header (see bottom). +# +# Usage: +# include(cmake/DetectSimdAndAlignment.cmake) +# message(STATUS "SIMD: ${SIMD_ARCH_FAMILY} ${SIMD_LEVEL}, alignment=${SIMD_ALIGNMENT}") +# +# # Example: propagate as defines +# target_compile_definitions(my_target PRIVATE +# SIMD_ALIGNMENT=${SIMD_ALIGNMENT} +# SIMD_LEVEL_${SIMD_LEVEL} +# ) +# DetectSimdAndAlignment.cmake - COMPLETE: x86 + ARM NEON + NVIDIA + PowerPC + + +include_guard(GLOBAL) # + +include(CheckCXXSourceCompiles) +include(CheckCXXSourceRuns) # For runtime CPU detection fallback + +# ------------------------------ +# 1. Detect architecture family +# ------------------------------ +if(NOT DEFINED SIMD_ARCH_FAMILY) + string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _simd_proc) + + if(_simd_proc MATCHES "x86_64|amd64|i[3-6]86") + set(_detected_arch "x86") + elseif(_simd_proc MATCHES "armv[0-9]+|aarch64|arm64") + set(_detected_arch "ARM") + elseif(_simd_proc MATCHES "ppc64(le|el)?|powerpc|ppc") + set(_detected_arch "PPC") + elseif(_simd_proc MATCHES "nvcl|sm_89|sm_90") + set(_detected_arch "NVIDIA") + else() + set(_detected_arch "UNKNOWN") + endif() + + set(SIMD_ARCH_FAMILY "${_detected_arch}" CACHE STRING "SIMD architecture family") +endif() + +# Defaults +set(SIMD_LEVEL "SCALAR" CACHE STRING "Detected SIMD level") +set(SIMD_ALIGNMENT 16 CACHE STRING "Alignment in bytes") +set(SIMD_HAS_FLOAT ON CACHE BOOL "Float SIMD support") +set(SIMD_HAS_DOUBLE ON CACHE BOOL "Double SIMD support") + +# Save/restore flags helper +set(_SIMD_SAVED_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}") +macro(_simd_restore_flags) + if(DEFINED _SIMD_SAVED_REQUIRED_FLAGS) + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS}") + endif() +endmacro() + +# ------------------------------------------------ +# 2. x86: SSE2 → AVX2 → AVX512 +# ------------------------------------------------ +if(SIMD_ARCH_FAMILY STREQUAL "x86") + # AVX512 double (64-byte) + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mavx512f -mavx512dq") + check_cxx_source_compiles(" + #include + int main() { __m512d v = _mm512_set1_pd(1.0); (void)v; return 0; } + " _HAVE_AVX512_DOUBLE) + + if(_HAVE_AVX512_DOUBLE) + set(SIMD_LEVEL "AVX512" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 64 CACHE STRING "" FORCE) + _simd_restore_flags() + return() + endif() + + # AVX2 double (32-byte) + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mavx2") + check_cxx_source_compiles(" + #include + int main() { __m256d v = _mm256_set1_pd(1.0); (void)v; return 0; } + " _HAVE_AVX2_DOUBLE) + + if(_HAVE_AVX2_DOUBLE) + set(SIMD_LEVEL "AVX2" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 32 CACHE STRING "" FORCE) + _simd_restore_flags() + return() + endif() + + # SSE2 double minimum (16-byte) + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -msse2") + check_cxx_source_compiles(" + #include + int main() { __m128d v = _mm_set1_pd(1.0); (void)v; return 0; } + " _HAVE_SSE2_DOUBLE) + + if(_HAVE_SSE2_DOUBLE) + set(SIMD_LEVEL "SSE2" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) + _simd_restore_flags() + return() + endif() + +# -------------------------------------- +# 3. ARM NEON - ALL FAMILIES +# -------------------------------------- +elseif(SIMD_ARCH_FAMILY STREQUAL "ARM") + string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _arm_proc) + + # AArch64 + SVE + if(_arm_proc MATCHES "aarch64|arm64") + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -march=armv8-a+sve") + check_cxx_source_compiles(" + #include + int main() { svfloat32_t v = svdup_f32(1.0f); (void)v; return 0; } + " _HAVE_SVE) + + if(_HAVE_SVE) + set(SIMD_LEVEL "SVE" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) + _simd_restore_flags() + return() + endif() + + # AArch64 NEON (double safe) + check_cxx_source_compiles(" + #include + int main() { + float64x2_t vd = vdupq_n_f64(1.0); + float32x4_t vf = vdupq_n_f32(1.0f); + (void)vd; (void)vf; return 0; + }" _HAVE_NEON_AARCH64) + + if(_HAVE_NEON_AARCH64) + set(SIMD_LEVEL "NEON_AARCH64" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) + _simd_restore_flags() + return() + endif() + + # ARMv8 32-bit + elseif(_arm_proc MATCHES "armv8") + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -march=armv8-a+simd") + check_cxx_source_compiles(" + #include + int main() { float32x4_t v = vdupq_n_f32(1.0f); (void)v; return 0; } + " _HAVE_ARMv8_NEON) + + if(_HAVE_ARMv8_NEON) + set(SIMD_LEVEL "NEON_ARMv8" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) + set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE) + _simd_restore_flags() + return() + endif() + + # ARMv7 NEON + elseif(_arm_proc MATCHES "armv7") + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mfpu=neon -march=armv7-a") + check_cxx_source_compiles(" + #include + int main() { float32x4_t v = vdupq_n_f32(1.0f); (void)v; return 0; } + " _HAVE_ARMv7_NEON) + + if(_HAVE_ARMv7_NEON) + set(SIMD_LEVEL "NEON_ARMv7" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) + set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE) + _simd_restore_flags() + return() + endif() + endif() + +# -------------------------------------- +# 4. POWERPC - COMPLETE COVERAGE (NEW!) +# -------------------------------------- +elseif(SIMD_ARCH_FAMILY STREQUAL "PPC") + + string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _ppc_proc) + + # === Power10+ (512-bit vectors, POWER10) + # Note: Power10 needs -mcpu=power10 or -mtune=power10 + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power10") + check_cxx_source_compiles(" + #include + int main() { + vector double vd = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0}; // 512-bit + vector float vf = {1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f}; + (void)vd; (void)vf; return 0; + }" _HAVE_POWER10) + + if(_HAVE_POWER10) + set(SIMD_LEVEL "POWER10" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 64 CACHE STRING "" FORCE) # 512-bit = 64 bytes + set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE) + set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE) + _simd_restore_flags() + return() + endif() + + # === Power9 VSX (256-bit, POWER8+) + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power9 -mvsx") + check_cxx_source_compiles(" + #include + int main() { + vector double vd = {1.0,1.0,1.0,1.0}; // 256-bit VSX double + vector float vf = {1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f}; // 256-bit + (void)vd; (void)vf; return 0; + }" _HAVE_VSX_POWER9) + + if(_HAVE_VSX_POWER9) + set(SIMD_LEVEL "VSX_POWER9" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 32 CACHE STRING "" FORCE) # 256-bit = 32 bytes + set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE) + set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE) + _simd_restore_flags() + return() + endif() + + # === Power7+ VSX (128-bit double, POWER7+) + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power7 -mvsx") + check_cxx_source_compiles(" + #include + int main() { + vector double vd = {1.0,1.0}; // VSX 128-bit double + (void)vd; return 0; + }" _HAVE_VSX_POWER7) + + if(_HAVE_VSX_POWER7) + set(SIMD_LEVEL "VSX_POWER7" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) + set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE) + set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE) + _simd_restore_flags() + return() + endif() + + # === Classic AltiVec/VMX (PowerPC baseline, 128-bit) + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -maltivec -mabi=altivec") + check_cxx_source_compiles(" + #include + int main() { + vector float vf = (vector float){1.0f,1.0f,1.0f,1.0f}; + (void)vf; return 0; + }" _HAVE_ALTIVEC) + + if(_HAVE_ALTIVEC) + set(SIMD_LEVEL "ALTIVEC" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) + set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE) + set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE) # AltiVec: float primary + _simd_restore_flags() + return() + endif() + +# -------------------------------------- +# 5. NVIDIA GH200 (sm_89) +# -------------------------------------- +elseif(SIMD_ARCH_FAMILY STREQUAL "NVIDIA") + set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} --gpu-arch=sm_89") + check_cxx_source_compiles(" + #include + int main() { double d = 1.0; (void)d; return 0; } + " _HAVE_CUDA_SM89) + + if(_HAVE_CUDA_SM89) + set(SIMD_LEVEL "CUDA_SM89" CACHE STRING "" FORCE) + set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE) + _simd_restore_flags() + return() + endif() + +# -------------------------------------- +# 6. Fallback +# -------------------------------------- +else() + _simd_restore_flags() + return() +endif() + +_simd_restore_flags() From e8b187daba85b3924a28b1db632b81e836a4f4e5 Mon Sep 17 00:00:00 2001 From: Mathieu Taillefumier Date: Tue, 24 Feb 2026 17:02:41 +0100 Subject: [PATCH 09/19] Build DDalphaAMG automatically when DDalphaAMG is enabled --- CMakeLists.txt | 62 ++++++++++-- DDalphaAMG/CMakeLists.txt | 193 ++++++++++++++++++++++++++++++++++++++ src/lib/CMakeLists.txt | 6 +- 3 files changed, 251 insertions(+), 10 deletions(-) create mode 100644 DDalphaAMG/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 82880ef60..5e587a1a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ endif() # PROJECT AND VERSION include(CMakeDependentOption) include(GNUInstallDirs) +include(FetchContent) cmake_policy(SET CMP0048 NEW) @@ -100,7 +101,9 @@ option(TM_USE_SHMEM "Use shmem API" OFF) option(TM_USE_QUDA "Enable QUDA support" OFF) option(TM_ENABLE_WARNINGS "Enable all warnings" ON) option(TM_ENABLE_TESTS "Enable tests" OFF) -set(TM_QPHIX_SOALEN "4" CACHE STRING "QPhiX specific parameter") +set(TM_QPHIX_SOALEN + "4" + CACHE STRING "QPhiX specific parameter") # MPI dependent options cmake_dependent_option( TM_PERSISTENT_MPI "Use persistent MPI calls for halfspinor [default=no]" OFF @@ -126,6 +129,44 @@ cmake_dependent_option(TM_USE_LEMON "Use the lemon io library" OFF "TM_USE_MPI" cmake_dependent_option(TM_USE_NVHPC "Enable Nvidia HPC toolkit" OFF "TM_USE_CUDA" OFF) +# DDAlphaAMG specific options + +cmake_dependent_option( + DDalphaAMG_ENABLE_PARAMOUNT_OUTPUT "Enable paramount output support" ON + "TM_USE_DDalphaAMG" OFF) + +cmake_dependent_option(DDalphaAMG_ENABLE_FGMRES_RESTEST "Enable GMRES test" ON + "TM_USE_DDalphaAMG" OFF) + +cmake_dependent_option( + DDalphaAMG_ENABLE_PROFILING "Enable paramount output support" OFF + "TM_USE_DDalphaAMG" OFF) + +cmake_dependent_option(DDalphaAMG_ENABLE_TRACK_RES "Enable track res support" + ON "TM_USE_DDalphaAMG" OFF) + +cmake_dependent_option( + DDalphaAMG_ENABLE_SINGLE_ALLREDUCE_ARNOLDI OFF + "Enable paramount output support" OFF "TM_USE_DDalphaAMG" OFF) + +cmake_dependent_option( + DDalphaAMG_ENABLE_COARSE_RES OFF "Enable paramount output support" OFF + "TM_USE_DDalphaAMG" OFF) + +cmake_dependent_option( + DDalphaAMG_ENABLE_SCHWARZ_RES OFF "Enable paramount output support" OFF + "TM_USE_DDalphaAMG" OFF) + +cmake_dependent_option(DDalphaAMG_ENABLE_OMP OFF "Enable OpenMP support" ON + "TM_USE_DDalphaAMG" OFF) + +cmake_dependent_option( + DDalphaAMG_ENABLE_TESTVECTOR_ANALYSIS "Enable vector analysis support" OFF + "TM_USE_DDalphaAMG" OFF) + +cmake_dependent_option(DDalphaAMG_ENABLE_HDF5 "Enable HDF5 support" OFF + "TM_USE_DDalphaAMG" OFF) + # search for blas and lapack find_package(BLAS REQUIRED) # @@ -144,12 +185,16 @@ set(ALIGN_BASE "0") set(ALIGN_BASE32 "0") set(ALIGN32 " ") -message("${TM_ENABLE_ALIGNMENT}") +# DO NOT MERGE the two if statements as otherwise the automatic alignment will +# not be taken into account + if(${TM_ENABLE_ALIGNMENT} STREQUAL "auto") include(cmake/DetectSimdAndAlignment.cmake) - message(STATUS "SIMD: ${SIMD_LEVEL} (${SIMD_ARCH_FAMILY}), align=${SIMD_ALIGNMENT}") + message( + STATUS "SIMD: ${SIMD_LEVEL} (${SIMD_ARCH_FAMILY}), align=${SIMD_ALIGNMENT}") endif() -if (${TM_ENABLE_ALIGNMENT} STREQUAL "none") + +if(${TM_ENABLE_ALIGNMENT} STREQUAL "none") set(ALIGN_BASE "0x00") set(ALIGN " ") set(ALIGN_BASE32 "0x00") @@ -255,7 +300,7 @@ if(TM_USE_FFTW) endif() if(TM_USE_DDalphaAMG) - find_package(DDalphaAMG REQUIRED) + add_subdirectory(DDalphaAMG) endif() if(TM_ENABLE_WARNINGS) @@ -289,8 +334,11 @@ if(TM_USE_MPI) endif() endif() -if (TM_USE_HALFSPINOR AND NOT TM_USE_GAUGE_COPY) - message(FATAL_ERROR "The TM_USE_GAUGE_COPY option should also be set to ON when TM_USE_HALFSPINOR is ON") +if(TM_USE_HALFSPINOR AND NOT TM_USE_GAUGE_COPY) + message( + FATAL_ERROR + "The TM_USE_GAUGE_COPY option should also be set to ON when TM_USE_HALFSPINOR is ON" + ) endif() # keep the autotool config.h header. configure_file("${PROJECT_SOURCE_DIR}/cmake/tmlqcd_config_internal.h.in" diff --git a/DDalphaAMG/CMakeLists.txt b/DDalphaAMG/CMakeLists.txt new file mode 100644 index 000000000..5b54acc24 --- /dev/null +++ b/DDalphaAMG/CMakeLists.txt @@ -0,0 +1,193 @@ +# there is a lot of custom directories to circonvent the deletion of the +# CMakeLists.txt contained in the DDalphaAMG directory. CMake will clone the +# source code and build it with the default options used in the ci/cd. More +# options are available in the main CMakeLists.txt. + +set(DDalphaAMG_SRC_DIR ${CMAKE_SOURCE_DIR}/DDalphaAMG/deps) + +FetchContent_Declare( + DDalphaAMG + GIT_REPOSITORY https://github.com/etmc/DDalphaAMG.git + SOURCE_DIR ${DDalphaAMG_SRC_DIR}) + +FetchContent_MakeAvailable(DDalphaAMG) + +list( + APPEND + DDalphaAMG_SRC_GENERIC + interpolation_generic.c + gathering_generic.c + sse_interpolation_generic.c + coarse_oddeven_generic.c + operator_generic.c + oddeven_generic.c + linalg_generic.c + init_generic.c + vcycle_generic.c + dirac_generic.c + coarse_operator_generic.c + coarsening_generic.c + schwarz_generic.c + ghost_generic.c + vectorization_dirac_generic.c + linsolve_generic.c + sse_coarse_operator_generic.c + data_generic.c + setup_generic.c + sse_linalg_generic.c) + +list( + APPEND + DDalphaAMG_HEADER_GENERIC + interpolation_generic.h + gathering_generic.h + sse_interpolation_generic.h + coarse_oddeven_generic.h + operator_generic.h + oddeven_generic.h + linalg_generic.h + init_generic.h + vcycle_generic.h + dirac_generic.h + coarse_operator_generic.h + coarsening_generic.h + schwarz_generic.h + ghost_generic.h + vectorization_dirac_generic.h + linsolve_generic.h + sse_coarse_operator_generic.h + data_generic.h + setup_generic.h + sse_linalg_generic.h + main_pre_def_generic.h + main_post_def_generic.h) + +list( + APPEND + DDalphaAMG_SRC_GENERAL + ${DDalphaAMG_SRC_DIR}/src/preconditioner.c + ${DDalphaAMG_SRC_DIR}/src/threading.c + ${DDalphaAMG_SRC_DIR}/src/main.c + ${DDalphaAMG_SRC_DIR}/src/sse_dirac.c + ${DDalphaAMG_SRC_DIR}/src/var_table.c + ${DDalphaAMG_SRC_DIR}/src/data_layout.c + ${DDalphaAMG_SRC_DIR}/src/linsolve.c + ${DDalphaAMG_SRC_DIR}/src/ghost.c + ${DDalphaAMG_SRC_DIR}/src/top_level.c + ${DDalphaAMG_SRC_DIR}/src/dirac.c + ${DDalphaAMG_SRC_DIR}/src/linalg.c + ${DDalphaAMG_SRC_DIR}/src/init.c + ${DDalphaAMG_SRC_DIR}/src/DDalphaAMG_interface.c + ${DDalphaAMG_SRC_DIR}/src/lime_io.c + ${DDalphaAMG_SRC_DIR}/src/sse_linalg.c + ${DDalphaAMG_SRC_DIR}/src/solver_analysis.c + ${DDalphaAMG_SRC_DIR}/src/io.c) + +message(STATUS "${DDalphaAMG_SRC_GENERAL}") +foreach(f IN LISTS DDalphaAMG_SRC_GENERIC) + string(REPLACE "_generic" "_float" f_float "${f}") + message(STATUS "${f_float}") + add_custom_command( + OUTPUT "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done" + COMMAND + sed -f "${DDalphaAMG_SRC_DIR}/float.sed" "${DDalphaAMG_SRC_DIR}/src/${f}" + > "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}" + COMMAND ${CMAKE_COMMAND} -E touch + "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done" + DEPENDS "${DDalphaAMG_SRC_DIR}/src/${f}" "${DDalphaAMG_SRC_DIR}/float.sed" + VERBATIM) + list(APPEND SED_MARKERS "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done") + list(APPEND DDalphaAMG_SRC_SINGLE_DOUBLE ${f_float}) + + string(REPLACE "_generic" "_double" f_double "${f}") + add_custom_command( + OUTPUT "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done" + COMMAND + sed -f "${DDalphaAMG_SRC_DIR}/double.sed" "${DDalphaAMG_SRC_DIR}/src/${f}" + > "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}" + COMMAND ${CMAKE_COMMAND} -E touch + "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done" + DEPENDS "${DDalphaAMG_SRC_DIR}/src/${f}" "${DDalphaAMG_SRC_DIR}/double.sed" + VERBATIM) + list(APPEND SED_MARKERS "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done") + list(APPEND DDalphaAMG_SRC_SINGLE_DOUBLE ${f_double}) +endforeach() + +# now parse the header +foreach(f IN LISTS DDalphaAMG_HEADER_GENERIC) + string(REPLACE "_generic" "_float" f_float "${f}") + message(STATUS "${f_float}") + add_custom_command( + OUTPUT "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done" + COMMAND + sed -f "${DDalphaAMG_SRC_DIR}/float.sed" "${DDalphaAMG_SRC_DIR}/src/${f}" + > "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}" + COMMAND ${CMAKE_COMMAND} -E touch + "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done" + DEPENDS "${DDalphaAMG_SRC_DIR}/src/${f}" "${DDalphaAMG_SRC_DIR}/float.sed" + VERBATIM) + list(APPEND SED_MARKERS "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done") + list(APPEND DDalphaAMG_HEADER_SINGLE_DOUBLE ${f_float}) + + string(REPLACE "_generic" "_double" f_double "${f}") + add_custom_command( + OUTPUT "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done" + COMMAND + sed -f "${DDalphaAMG_SRC_DIR}/double.sed" "${DDalphaAMG_SRC_DIR}/src/${f}" + > "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}" + COMMAND ${CMAKE_COMMAND} -E touch + "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done" + DEPENDS "${DDalphaAMG_SRC_DIR}/src/${f}" "${DDalphaAMG_SRC_DIR}/double.sed" + VERBATIM) + + list(APPEND SED_MARKERS "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done") + list(APPEND DDalphaAMG_HEADER_SINGLE_DOUBLE ${f_double}) +endforeach() + +foreach(outfile IN LISTS DDalphaAMG_SRC_SINGLE_DOUBLE + DDalphaAMG_HEADER_SINGLE_DOUBLE) + set_source_files_properties("${CMAKE_BINARY_DIR}/DDalphaAMG/${outfile}" + PROPERTIES GENERATED TRUE) +endforeach() + +# message(STATUS "${DDalphaAMG_SRC_SINGLE_DOUBLE}") + +add_custom_target(run_sed ALL DEPENDS ${SED_MARKERS}) + +add_library(DDalphaAMG ${DDalphaAMG_SRC_GENERAL} + ${DDalphaAMG_SRC_SINGLE_DOUBLE}) + +target_compile_options(DDalphaAMG + PRIVATE "$<$:-mavx2;-mfma>") + +add_dependencies(DDalphaAMG run_sed) + +target_link_libraries( + DDalphaAMG + PUBLIC MPI::MPI_C $<$:hdf5:hdf5> + $<$:OpenMP::OpenMP_C> tmlqcd::clime) + +target_include_directories( + DDalphaAMG + PUBLIC $ + $ + $ + $) + +target_compile_definitions( + DDalphaAMG + PUBLIC + $<$:PARAMOUNTOUTPUT> + $<$:FGMRES_RESTEST> + $<$:PROFILING> + $<$:SINGLE_ALLREDUCE_ARNOLDI> + $<$:COARSE_RES> + $<$:SCHWARZ_RES> + $<$:OPENMP> + $<$:TRACK_RES> + $<$:TESTVECTOR_ANALYSIS> + $<$:HAVE_HDF5> + $<$:DEBUG> + SSE) + +# add_library(tmlqcd::DDalphaAMG alias DDalphaAMG) diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index ebed35308..b70e7a80d 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -407,8 +407,8 @@ if(CMAKE_MAJOR_VERSION LESS 4) flex_target(tmlqcd_input_read read_input.l ${CMAKE_BINARY_DIR}/read_input.c COMPILE_FLAGS "-Ca -Ptmlqcd -i") else() - flex_target(tmlqcd_input_read read_input.l ${CMAKE_BINARY_DIR}/read_input.c OPTIONS - "-Ca -Ptmlqcd -i") + flex_target(tmlqcd_input_read read_input.l ${CMAKE_BINARY_DIR}/read_input.c + OPTIONS "-Ca -Ptmlqcd -i") endif() # create a target library with namespacing because cmake does not know name @@ -425,7 +425,7 @@ set_target_properties(hmc PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION 1) # define a library and add the dependencies target_link_libraries( hmc - PUBLIC $<$:tmlqcd::DDalphaAMG> + PUBLIC $<$:DDalphaAMG> $<$:tmlqcd::qphix> $<$:tmlqcd::fftw3> $<$:QUDA::quda> From d6284c018d934abb9209bbf5210236aeafe60660 Mon Sep 17 00:00:00 2001 From: Roman Gruber Date: Wed, 25 Feb 2026 14:50:17 +0100 Subject: [PATCH 10/19] first draft for a global application context --- src/lib/app_context/app_context.c | 59 +++++++++++++++++++++++ src/lib/include/mpi.h | 77 +++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 src/lib/app_context/app_context.c create mode 100644 src/lib/include/mpi.h diff --git a/src/lib/app_context/app_context.c b/src/lib/app_context/app_context.c new file mode 100644 index 000000000..00d5d94b2 --- /dev/null +++ b/src/lib/app_context/app_context.c @@ -0,0 +1,59 @@ +/*********************************************************************** + * + * Copyright (C) 2026 Roman Gruber + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + * App context module + * + * Author: Roman Gruber + * roman.gruber@unibe.ch + * + *******************************************************************************/ + +#include +#include +#include "fatal_error.h" + + +static AppContext app_instance = { + .mpi = { + .comm = MPI_COMM_WORLD // default communicator + } +}; + + +const AppContext* app(void) +{ + return &app_instance; +} + + +void app_context_init(const MPI_Comm comm) +{ + static bool initialized = false; + + if (initialized) fatal_error("Application context already initialized", __func__); + + app_instance.mpi.comm = comm; + initialized = true; +} + + +void app_context_finalize(void) +{ + +} diff --git a/src/lib/include/mpi.h b/src/lib/include/mpi.h new file mode 100644 index 000000000..0d5b02993 --- /dev/null +++ b/src/lib/include/mpi.h @@ -0,0 +1,77 @@ +/*********************************************************************** + * + * Copyright (C) 2026 Roman Gruber + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + * Simple MPI header wrapper + * + * Author: Roman Gruber + * roman.gruber@unibe.ch + * + *******************************************************************************/ + +#ifndef MY_MPI_WRAPPER_H +#define MY_MPI_WRAPPER_H + + +// include *real* MPI header +#include_next + + +/** + * @brief MPI context + * + * @var comm MPI communicator + */ +typedef struct { + MPI_Comm comm; +} MPIContext; + + +/** + * @brief The global application context struct + * + * @var mpi MPI context + */ +typedef struct { + MPIContext mpi; +} AppContext; + + +/** + * @brief Return the global application context struct + * + * @return Global application context struct + */ +const AppContext* app(void); + + +/** + * @brief Initialize application context + * + * @param[in] comm The MPI communicator to use throughout the application + */ +void app_context_init(const MPI_Comm comm); + + +/** + * @brief Finalize application context + */ +void app_context_finalize(void); + + +#endif From 6822bb3e348052c68d39d2e1d674d569e8d1b700 Mon Sep 17 00:00:00 2001 From: Roman Gruber Date: Wed, 25 Feb 2026 14:59:20 +0100 Subject: [PATCH 11/19] add app_context to cmake build --- src/lib/CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index b70e7a80d..367f33d5f 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -68,6 +68,11 @@ list( # init/init_stout_smear_vars.c init/init_moment_field.c) +list( + APPEND + APP_CONTEXT_SRC_C + app_context/app_context.c) + list( APPEND SOLVER_SRC_C @@ -390,6 +395,7 @@ list( ${LINALG_SRC_C} ${IO_SRC_C} ${INIT_SRC_C} + ${APP_CONTEXT_SRC_C} ${SOLVER_SRC_C} ${TEST_SRC_C} ${MEAS_SRC_C} From a4250d97de84a6f07e9b303b832c5a7ca02274a9 Mon Sep 17 00:00:00 2001 From: Roman Gruber Date: Wed, 25 Feb 2026 15:00:08 +0100 Subject: [PATCH 12/19] search/replace on whole codebase: MPI_COMM_WORLD -> app()->mpi.comm --- src/bin/benchmark.c | 28 +- src/bin/deriv_mg_tune.c | 2 +- src/bin/hmc_tm.c | 6 +- src/bin/invert.c | 2 +- src/bin/offline_measurement.c | 2 +- src/bin/tests/hopping_test.c | 2 +- src/bin/tests/qphix_test_Dslash.c | 16 +- src/bin/tests/test_eigenvalues.c | 8 +- src/lib/DDalphaAMG_interface.c | 8 +- src/lib/compare_derivative.c | 4 +- src/lib/fatal_error.c | 2 +- src/lib/init/init_parallel.c | 2 +- src/lib/io/deri_write_stdout.c | 2 +- src/lib/io/dml.c | 2 +- src/lib/io/eospinor_read.c | 2 +- src/lib/io/eospinor_write.c | 8 +- src/lib/io/gauge_read_binary.c | 2 +- src/lib/io/gauge_write.c | 2 +- src/lib/io/gauge_write_binary.c | 2 +- src/lib/io/io_cm.c | 2 +- src/lib/io/spinor_read_binary.c | 4 +- src/lib/io/spinor_write_binary.c | 4 +- src/lib/io/spinor_write_stdout.c | 2 +- src/lib/io/sw_write_stdout.c | 2 +- src/lib/linalg/assign_mul_add_r_and_square.c | 2 +- src/lib/linalg/diff_and_square_norm.c | 2 +- src/lib/linalg/scalar_prod_body.c | 4 +- src/lib/linalg/scalar_prod_i.c | 2 +- src/lib/linalg/scalar_prod_r.c | 2 +- src/lib/linalg/scalar_prod_r_32.c | 2 +- src/lib/linalg/square_and_minmax.c | 32 +- src/lib/linalg/square_and_prod_r.c | 4 +- src/lib/linalg/square_norm.c | 4 +- src/lib/linalg/square_norm_32.c | 4 +- src/lib/meas/correlators.c | 4 +- ...easure_clover_field_strength_observables.c | 4 +- src/lib/meas/oriented_plaquettes.c | 2 +- src/lib/meas/pion_norm.c | 2 +- src/lib/meas/polyakov_loop.c | 4 +- src/lib/measure_gauge_action.c | 4 +- src/lib/measure_rectangles.c | 2 +- src/lib/monomial/moment_energy.c | 2 +- src/lib/monomial/monitor_forces.c | 4 +- src/lib/mpi_init.c | 10 +- src/lib/operator/clover_det.c | 4 +- src/lib/prepare_source.c | 8 +- src/lib/qphix/qphix_interface.cpp | 4 +- src/lib/quda_interface.c | 14 +- src/lib/reweighting_factor.c | 2 +- src/lib/sighandler.c | 2 +- src/lib/solver/dfl_projector.c | 6 +- src/lib/solver/gcr4complex_body.c | 6 +- src/lib/solver/little_project_eo_body.c | 2 +- src/lib/start.c | 12 +- src/lib/test/check_xchange.c | 400 +++++++++--------- src/lib/update_tm.c | 4 +- src/lib/wrapper/lib_wrapper.c | 4 +- tests/test_buffers.c | 2 +- 58 files changed, 341 insertions(+), 341 deletions(-) diff --git a/src/bin/benchmark.c b/src/bin/benchmark.c index b2f4ee68c..f0b15ce24 100644 --- a/src/bin/benchmark.c +++ b/src/bin/benchmark.c @@ -102,7 +102,7 @@ int main(int argc, char *argv[]) { #else MPI_Init(&argc, &argv); #endif - MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); + MPI_Comm_rank(app()->mpi.comm, &g_proc_id); #else g_proc_id = 0; @@ -234,7 +234,7 @@ int main(int argc, char *argv[]) { antioptaway = 0.0; /* compute approximately how many applications we need to do to get a reliable measurement */ #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif t1 = gettime(); for (j = 0; j < j_max; j++) { @@ -248,14 +248,14 @@ int main(int argc, char *argv[]) { // division by g_nproc because we will average over processes j = (int)(ceil(j_max * 31.0 / dt / g_nproc)); #ifdef TM_USE_MPI - MPI_Allreduce(&j, &j_max, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&j, &j_max, 1, MPI_INT, MPI_SUM, app()->mpi.comm); #else j_max = j; #endif /* perform the actual benchmark */ #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif t1 = gettime(); antioptaway = 0.0; @@ -268,14 +268,14 @@ int main(int argc, char *argv[]) { } dt = gettime() - t1; #ifdef TM_USE_MPI - MPI_Allreduce(&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); #else sdt = dt; #endif qdt = dt * dt; #ifdef TM_USE_MPI - MPI_Allreduce(&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); #else sqdt = qdt; #endif @@ -321,9 +321,9 @@ int main(int argc, char *argv[]) { dt2 = t2 - t1; /* compute the bandwidth */ dt = dts - dt2; - MPI_Allreduce(&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); sdt = sdt / ((double)g_nproc); - MPI_Allreduce(&dt2, &dt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&dt2, &dt, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); dt = dt / ((double)g_nproc); dt = 1.0e6f * dt / ((double)(k_max * j_max * (VOLUME))); if (g_proc_id == 0) { @@ -365,7 +365,7 @@ int main(int argc, char *argv[]) { /* estimate a reasonable number of applications to get a reliable measurement */ #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif t1 = gettime(); for (j = 0; j < j_max; j++) { @@ -379,14 +379,14 @@ int main(int argc, char *argv[]) { // division by g_nproc because we will average over processes using MPI_SUM j = (int)(ceil(j_max * 31.0 / dt / g_nproc)); #ifdef TM_USE_MPI - MPI_Allreduce(&j, &j_max, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&j, &j_max, 1, MPI_INT, MPI_SUM, app()->mpi.comm); #else j_max = j; #endif /* perform the actual measurement */ #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif t1 = gettime(); for (j = 0; j < j_max; j++) { @@ -398,13 +398,13 @@ int main(int argc, char *argv[]) { t2 = gettime(); dt = t2 - t1; #ifdef TM_USE_MPI - MPI_Allreduce(&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&dt, &sdt, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); #else sdt = dt; #endif qdt = dt * dt; #ifdef TM_USE_MPI - MPI_Allreduce(&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&qdt, &sqdt, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); #else sqdt = qdt; #endif @@ -451,7 +451,7 @@ int main(int argc, char *argv[]) { free_spinor_field(); free_moment_field(); #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); MPI_Finalize(); #endif return (0); diff --git a/src/bin/deriv_mg_tune.c b/src/bin/deriv_mg_tune.c index 7c45524de..2f5563337 100644 --- a/src/bin/deriv_mg_tune.c +++ b/src/bin/deriv_mg_tune.c @@ -357,7 +357,7 @@ int main(int argc, char *argv[]) { _endQuda(); #endif #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); MPI_Finalize(); #endif diff --git a/src/bin/hmc_tm.c b/src/bin/hmc_tm.c index b68a5250f..2082358ee 100644 --- a/src/bin/hmc_tm.c +++ b/src/bin/hmc_tm.c @@ -479,7 +479,7 @@ int main(int argc, char *argv[]) { sleep(io_timeout); #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif } /* Now move .conf.tmp into place */ @@ -528,7 +528,7 @@ int main(int argc, char *argv[]) { } #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif if (ix == 0 && g_proc_id == 0) { countfile = fopen("history_hmc_tm", "a"); @@ -581,7 +581,7 @@ int main(int argc, char *argv[]) { _endQuda(); #endif #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); MPI_Finalize(); #endif diff --git a/src/bin/invert.c b/src/bin/invert.c index b5040ba88..7e004188c 100644 --- a/src/bin/invert.c +++ b/src/bin/invert.c @@ -448,7 +448,7 @@ int main(int argc, char *argv[]) { _endQuda(); #endif #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); MPI_Finalize(); #endif return (0); diff --git a/src/bin/offline_measurement.c b/src/bin/offline_measurement.c index b6cbc13fa..c42d51958 100644 --- a/src/bin/offline_measurement.c +++ b/src/bin/offline_measurement.c @@ -297,7 +297,7 @@ int main(int argc, char *argv[]) { free(input_filename); #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); MPI_Finalize(); #endif return (0); diff --git a/src/bin/tests/hopping_test.c b/src/bin/tests/hopping_test.c index 0e5ff03e7..a09c0e46f 100644 --- a/src/bin/tests/hopping_test.c +++ b/src/bin/tests/hopping_test.c @@ -318,7 +318,7 @@ int main(int argc, char *argv[]) { } #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); MPI_Finalize(); #endif } diff --git a/src/bin/tests/qphix_test_Dslash.c b/src/bin/tests/qphix_test_Dslash.c index 41e2602a4..b998fe52f 100644 --- a/src/bin/tests/qphix_test_Dslash.c +++ b/src/bin/tests/qphix_test_Dslash.c @@ -221,7 +221,7 @@ int main(int argc, char* argv[]) { source_location, 12345 /* seed */); #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif tm_t1 = gettime(); @@ -231,7 +231,7 @@ int main(int argc, char* argv[]) { tm_t2 = gettime(); #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif q_t1 = gettime(); Mfull_qphix(qphix_out_cb_spinors[0], qphix_out_cb_spinors[1], op->sr0, op->sr1, op->type); @@ -297,7 +297,7 @@ int main(int argc, char* argv[]) { free_spinor_field(); free_moment_field(); #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); MPI_Finalize(); #endif return (failed); @@ -305,7 +305,7 @@ int main(int argc, char* argv[]) { double compare_spinors(spinor* s1, spinor* s2) { #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif int coords[4]; int x, y, z, t, id = 0; @@ -352,7 +352,7 @@ double compare_spinors(spinor* s1, spinor* s2) { } } #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif } // z } // y @@ -361,7 +361,7 @@ double compare_spinors(spinor* s1, spinor* s2) { } // if( SourceInfo.type == SRC_TYPE_POINT ) #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif if (g_proc_id == 0) { printf("\n"); @@ -376,7 +376,7 @@ double compare_spinors(spinor* s1, spinor* s2) { double squarenorm = diff_and_square_norm(s1, s2, VOLUME); #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif id = 0; for (int t_global = 0; t_global < g_nproc_t * T; t_global++) { @@ -408,7 +408,7 @@ double compare_spinors(spinor* s1, spinor* s2) { } } #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif } // z } // y diff --git a/src/bin/tests/test_eigenvalues.c b/src/bin/tests/test_eigenvalues.c index c52d29cf8..6d162117e 100644 --- a/src/bin/tests/test_eigenvalues.c +++ b/src/bin/tests/test_eigenvalues.c @@ -390,14 +390,14 @@ int main(int argc, char *argv[]) { } rlxd_get(rlxd_state); #ifdef TM_USE_MPI - MPI_Send(&rlxd_state[0], 105, MPI_INT, 1, 99, MPI_COMM_WORLD); - MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_nproc - 1, 99, MPI_COMM_WORLD, &status); + MPI_Send(&rlxd_state[0], 105, MPI_INT, 1, 99, app()->mpi.comm); + MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_nproc - 1, 99, app()->mpi.comm, &status); rlxd_reset(rlxd_state); #endif } #ifdef TM_USE_MPI else { - MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_proc_id - 1, 99, MPI_COMM_WORLD, &status); + MPI_Recv(&rlxd_state[0], 105, MPI_INT, g_proc_id - 1, 99, app()->mpi.comm, &status); rlxd_reset(rlxd_state); /* hot */ if (startoption == 1) { @@ -408,7 +408,7 @@ int main(int argc, char *argv[]) { k = 0; } rlxd_get(rlxd_state); - MPI_Send(&rlxd_state[0], 105, MPI_INT, k, 99, MPI_COMM_WORLD); + MPI_Send(&rlxd_state[0], 105, MPI_INT, k, 99, app()->mpi.comm); } #endif diff --git a/src/lib/DDalphaAMG_interface.c b/src/lib/DDalphaAMG_interface.c index bf2da4bef..dda8a826e 100644 --- a/src/lib/DDalphaAMG_interface.c +++ b/src/lib/DDalphaAMG_interface.c @@ -350,7 +350,7 @@ static int MG_pre_solve(su3 **gf) { MG_init(); mg_initialized = 1; if (g_proc_id == 0) printf("TM_USE_DDalphaAMG initialized\n"); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); } if (mg_update_gauge == 1) { @@ -1243,7 +1243,7 @@ int MG_solver(spinor *const phi_new, spinor *const phi_old, const double precisi if (g_proc_id == 0) printf("ERROR: solver didn't converge after two trials!! Aborting... \n"); // TODO: handle abort DDalphaAMG_finalize(); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); MPI_Finalize(); exit(1); } @@ -1319,7 +1319,7 @@ int MG_solver_nd(spinor *const up_new, spinor *const dn_new, spinor *const up_ol if (g_proc_id == 0) printf("ERROR: solver didn't converge after two trials!! Aborting... \n"); // TODO: handle abort DDalphaAMG_finalize(); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); MPI_Finalize(); exit(1); } @@ -1393,7 +1393,7 @@ int MG_mms_solver_nd(spinor **const up_new, spinor **const dn_new, spinor *const if (g_proc_id == 0) printf("ERROR: solver didn't converge after two trials!! Aborting... \n"); // TODO: handle abort DDalphaAMG_finalize(); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); MPI_Finalize(); exit(1); } diff --git a/src/lib/compare_derivative.c b/src/lib/compare_derivative.c index 6ac39f222..b7802481f 100644 --- a/src/lib/compare_derivative.c +++ b/src/lib/compare_derivative.c @@ -64,8 +64,8 @@ void compare_derivative(monomial *mnl, su3adj **ext_lib, su3adj **native, const int red_n_diff = 0; #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); - MPI_Reduce(&n_diff, &red_n_diff, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); + MPI_Reduce(&n_diff, &red_n_diff, 1, MPI_INT, MPI_MAX, 0, app()->mpi.comm); #else red_n_diff = n_diff; #endif diff --git a/src/lib/fatal_error.c b/src/lib/fatal_error.c index b4f5d5be9..8e00dfbe7 100644 --- a/src/lib/fatal_error.c +++ b/src/lib/fatal_error.c @@ -46,7 +46,7 @@ void fatal_error(char const *error, char const *function) { } #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif diff --git a/src/lib/init/init_parallel.c b/src/lib/init/init_parallel.c index 194404d7d..9dfdbb0c5 100644 --- a/src/lib/init/init_parallel.c +++ b/src/lib/init/init_parallel.c @@ -71,7 +71,7 @@ void init_parallel_and_read_input(int argc, char *argv[], const char input_filen #endif // QPHIX_QMP_COMMS #if defined(TM_USE_MPI) || defined(QPHIX_QMP_COMMS) - MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); + MPI_Comm_rank(app()->mpi.comm, &g_proc_id); #else g_proc_id = 0; #endif diff --git a/src/lib/io/deri_write_stdout.c b/src/lib/io/deri_write_stdout.c index 6b095d22e..db1c4f98e 100644 --- a/src/lib/io/deri_write_stdout.c +++ b/src/lib/io/deri_write_stdout.c @@ -66,7 +66,7 @@ void deri_write_stdout(su3adj** const df) { } } #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif } } diff --git a/src/lib/io/dml.c b/src/lib/io/dml.c index 2650e108d..0465f1206 100644 --- a/src/lib/io/dml.c +++ b/src/lib/io/dml.c @@ -31,7 +31,7 @@ int DML_global_xor(uint32_t *x) { int status; status = - MPI_Allreduce((void *)&work, (void *)&dest, 1, MPI_UNSIGNED_LONG, MPI_BXOR, MPI_COMM_WORLD); + MPI_Allreduce((void *)&work, (void *)&dest, 1, MPI_UNSIGNED_LONG, MPI_BXOR, app()->mpi.comm); if (status == MPI_SUCCESS) { *x = (uint32_t)dest; diff --git a/src/lib/io/eospinor_read.c b/src/lib/io/eospinor_read.c index 2767d8b13..d23623595 100644 --- a/src/lib/io/eospinor_read.c +++ b/src/lib/io/eospinor_read.c @@ -99,7 +99,7 @@ int read_eospinor(spinor *const s, char *filename) { "LIME read error occured with status = %d while reading file %s!\n Aborting...\n", status, filename); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(500); diff --git a/src/lib/io/eospinor_write.c b/src/lib/io/eospinor_write.c index 619e11ac8..52c3e688d 100644 --- a/src/lib/io/eospinor_write.c +++ b/src/lib/io/eospinor_write.c @@ -67,7 +67,7 @@ int write_eospinor(spinor* const s, char* filename, const double evalue, const d if (limewriter == (LimeWriter*)NULL) { fprintf(stderr, "LIME error in file %s for writing!\n Aboring...\n", filename); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(500); @@ -78,7 +78,7 @@ int write_eospinor(spinor* const s, char* filename, const double evalue, const d if (status < 0) { fprintf(stderr, "LIME write header (xlf-info) error %d\n", status); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(500); @@ -94,7 +94,7 @@ int write_eospinor(spinor* const s, char* filename, const double evalue, const d if (status < 0) { fprintf(stderr, "LIME write header (eospinor-binary-data) error %d\n", status); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(500); @@ -137,7 +137,7 @@ int write_eospinor(spinor* const s, char* filename, const double evalue, const d if (status < 0) { fprintf(stderr, "LIME write error %d\n", status); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(500); diff --git a/src/lib/io/gauge_read_binary.c b/src/lib/io/gauge_read_binary.c index 473e4d9c7..28cebd7f7 100644 --- a/src/lib/io/gauge_read_binary.c +++ b/src/lib/io/gauge_read_binary.c @@ -219,7 +219,7 @@ int read_binary_gauge_data(LimeReader *limereader, DML_Checksum *checksum, param "LIME read error occurred with status = %d while reading in gauge_read_binary.c!\n", status); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif return (-2); diff --git a/src/lib/io/gauge_write.c b/src/lib/io/gauge_write.c index 43ff7fd83..20605bc8a 100644 --- a/src/lib/io/gauge_write.c +++ b/src/lib/io/gauge_write.c @@ -50,7 +50,7 @@ int write_gauge_field(char *filename, const int prec, paramsXlfInfo const *xlfIn fflush(stdout); } #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif /* MPI */ destruct_writer(writer); diff --git a/src/lib/io/gauge_write_binary.c b/src/lib/io/gauge_write_binary.c index ad3c7882e..64f504264 100644 --- a/src/lib/io/gauge_write_binary.c +++ b/src/lib/io/gauge_write_binary.c @@ -222,7 +222,7 @@ int write_binary_gauge_data(LimeWriter* limewriter, const int prec, DML_Checksum fprintf(stderr, "id = %d, bytes = %lu, size = %d\n", g_cart_id, bytes, (int)(4 * sizeof(su3) / 8)); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(500); diff --git a/src/lib/io/io_cm.c b/src/lib/io/io_cm.c index e08fd37c6..2f4bfa5a4 100644 --- a/src/lib/io/io_cm.c +++ b/src/lib/io/io_cm.c @@ -104,7 +104,7 @@ int read_spinorfield_cm_swap_single(spinor *const s, spinor *const r, char *file if (ifs == (FILE *)NULL) { fprintf(stderr, "Could not open file %s\n Aborting...\n", filename); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(500); diff --git a/src/lib/io/spinor_read_binary.c b/src/lib/io/spinor_read_binary.c index 81607a700..fbf97c32c 100644 --- a/src/lib/io/spinor_read_binary.c +++ b/src/lib/io/spinor_read_binary.c @@ -198,7 +198,7 @@ int read_binary_spinor_data(spinor *const s, spinor *const r, LimeReader *limere "spinor_read_binary.c!\n", status); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif return (-2); @@ -376,7 +376,7 @@ int read_binary_spinor_data_l(spinor *const s, LimeReader *limereader, DML_Check "spinor_read_binary.c!\n", status); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif return (-2); diff --git a/src/lib/io/spinor_write_binary.c b/src/lib/io/spinor_write_binary.c index 560b5ce65..b8718cc9e 100644 --- a/src/lib/io/spinor_write_binary.c +++ b/src/lib/io/spinor_write_binary.c @@ -217,7 +217,7 @@ int write_binary_spinor_data(spinor *const s, spinor *const r, LimeWriter *limew "write_binary_spinor_data (spinor_write_binary.c)!\n", status); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(500); @@ -458,7 +458,7 @@ int write_binary_spinor_data_l(spinor *const s, LimeWriter *limewriter, DML_Chec "write_binary_spinor_data_l (spinor_write_binary.c)!\n", status); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(500); diff --git a/src/lib/io/spinor_write_stdout.c b/src/lib/io/spinor_write_stdout.c index 9c13abf47..6c1fa1034 100644 --- a/src/lib/io/spinor_write_stdout.c +++ b/src/lib/io/spinor_write_stdout.c @@ -57,7 +57,7 @@ void spinor_write_stdout(spinor* const s) { fflush(stdout); } #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif } } diff --git a/src/lib/io/sw_write_stdout.c b/src/lib/io/sw_write_stdout.c index 027edd172..b2cec0b29 100644 --- a/src/lib/io/sw_write_stdout.c +++ b/src/lib/io/sw_write_stdout.c @@ -66,7 +66,7 @@ void sw_write_stdout(su3** u) { } } #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif } } diff --git a/src/lib/linalg/assign_mul_add_r_and_square.c b/src/lib/linalg/assign_mul_add_r_and_square.c index f9fb60f72..fc45c313f 100644 --- a/src/lib/linalg/assign_mul_add_r_and_square.c +++ b/src/lib/linalg/assign_mul_add_r_and_square.c @@ -99,7 +99,7 @@ double assign_mul_add_r_and_square(spinor *const R, const double c, const spinor #ifdef TM_USE_MPI if (parallel) { - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); return (mres); } #endif diff --git a/src/lib/linalg/diff_and_square_norm.c b/src/lib/linalg/diff_and_square_norm.c index 01a134fc8..0b7d6612a 100644 --- a/src/lib/linalg/diff_and_square_norm.c +++ b/src/lib/linalg/diff_and_square_norm.c @@ -72,7 +72,7 @@ double diff_and_square_norm(spinor *const Q, spinor *const R, const int N) { } kc = ks + kc; #ifdef TM_USE_MPI - MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); return ks; #else return kc; diff --git a/src/lib/linalg/scalar_prod_body.c b/src/lib/linalg/scalar_prod_body.c index 9a38cf5fb..3114d6119 100644 --- a/src/lib/linalg/scalar_prod_body.c +++ b/src/lib/linalg/scalar_prod_body.c @@ -71,7 +71,7 @@ _Complex double _PSWITCH(scalar_prod)(const _PTSWITCH(spinor) *const S, #ifdef TM_USE_MPI if (parallel == 1) { - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, app()->mpi.comm); return (mres); } #endif @@ -113,7 +113,7 @@ _Complex double _PSWITCH(scalar_prod_ts)(const _PTSWITCH(spinor) *const S, #ifdef TM_USE_MPI if (parallel == 1) { - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, app()->mpi.comm); return (mres); } #endif diff --git a/src/lib/linalg/scalar_prod_i.c b/src/lib/linalg/scalar_prod_i.c index ca59c65d3..ca1d378b7 100644 --- a/src/lib/linalg/scalar_prod_i.c +++ b/src/lib/linalg/scalar_prod_i.c @@ -64,7 +64,7 @@ double scalar_prod_i(spinor *const S, spinor *const R, const int N, const int pa #if defined TM_USE_MPI if (parallel == 1) { - MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); kc = ks; } #endif diff --git a/src/lib/linalg/scalar_prod_r.c b/src/lib/linalg/scalar_prod_r.c index c5288aa34..8e1c07af1 100644 --- a/src/lib/linalg/scalar_prod_r.c +++ b/src/lib/linalg/scalar_prod_r.c @@ -91,7 +91,7 @@ double scalar_prod_r(const spinor *const S, const spinor *const R, const int N, #if defined TM_USE_MPI if (parallel) { - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); return mres; } #endif diff --git a/src/lib/linalg/scalar_prod_r_32.c b/src/lib/linalg/scalar_prod_r_32.c index 5bc512806..23f673ee2 100644 --- a/src/lib/linalg/scalar_prod_r_32.c +++ b/src/lib/linalg/scalar_prod_r_32.c @@ -64,7 +64,7 @@ float scalar_prod_r_32(const spinor32 *const S, const spinor32 *const R, const i #if defined TM_USE_MPI if (parallel) { - MPI_Allreduce(&res, &mres, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_FLOAT, MPI_SUM, app()->mpi.comm); return mres; } #endif diff --git a/src/lib/linalg/square_and_minmax.c b/src/lib/linalg/square_and_minmax.c index 3a97c1ede..6432d2602 100644 --- a/src/lib/linalg/square_and_minmax.c +++ b/src/lib/linalg/square_and_minmax.c @@ -75,12 +75,12 @@ void square_and_minmax(double *const sum, double *const min, double *const max, #if defined TM_USE_MPI - MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); - MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, app()->mpi.comm); *min = kc; - MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, app()->mpi.comm); *max = kc; #endif @@ -129,12 +129,12 @@ void square_and_minmax_rel(double *const sum, double *const min, double *const m #if defined TM_USE_MPI - MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); - MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, app()->mpi.comm); *min = kc; - MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, app()->mpi.comm); *max = kc; #endif @@ -256,18 +256,18 @@ void square_and_minmax_abs(double *const sum, double *const min, double *const m #if defined TM_USE_MPI - MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); - MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, app()->mpi.comm); *min = kc; - MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, app()->mpi.comm); *max = kc; - MPI_Allreduce(min_abs, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(min_abs, &kc, 1, MPI_DOUBLE, MPI_MIN, app()->mpi.comm); *min_abs = kc; - MPI_Allreduce(max_abs, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(max_abs, &kc, 1, MPI_DOUBLE, MPI_MAX, app()->mpi.comm); *max_abs = kc; #endif @@ -428,18 +428,18 @@ void square_and_minmax_rel_abs(double *const sum, double *const min, double *con #if defined TM_USE_MPI - MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&kc, sum, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); - MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(min, &kc, 1, MPI_DOUBLE, MPI_MIN, app()->mpi.comm); *min = kc; - MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(max, &kc, 1, MPI_DOUBLE, MPI_MAX, app()->mpi.comm); *max = kc; - MPI_Allreduce(min_abs, &kc, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(min_abs, &kc, 1, MPI_DOUBLE, MPI_MIN, app()->mpi.comm); *min_abs = kc; - MPI_Allreduce(max_abs, &kc, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(max_abs, &kc, 1, MPI_DOUBLE, MPI_MAX, app()->mpi.comm); *max_abs = kc; #endif diff --git a/src/lib/linalg/square_and_prod_r.c b/src/lib/linalg/square_and_prod_r.c index 212b46593..b02babf42 100644 --- a/src/lib/linalg/square_and_prod_r.c +++ b/src/lib/linalg/square_and_prod_r.c @@ -80,7 +80,7 @@ void square_and_prod_r(double *const x1, double *const x2, spinor *const S, spin #if defined TM_USE_MPI - MPI_Allreduce(&xkc, x1, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&xkc, x1, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); #endif kc = ks + kc; @@ -88,7 +88,7 @@ void square_and_prod_r(double *const x1, double *const x2, spinor *const S, spin #if defined TM_USE_MPI - MPI_Allreduce(&kc, x2, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&kc, x2, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); #endif } diff --git a/src/lib/linalg/square_norm.c b/src/lib/linalg/square_norm.c index fd4b77781..7c871f41b 100644 --- a/src/lib/linalg/square_norm.c +++ b/src/lib/linalg/square_norm.c @@ -91,7 +91,7 @@ double square_norm(const spinor *const P, const int N, const int parallel) { #ifdef TM_USE_MPI if (parallel) { - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); return mres; } #endif @@ -142,7 +142,7 @@ double square_norm_ts(const spinor *const P, const int N, const int parallel) { #ifdef TM_USE_MPI if (parallel) { - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); return mres; } #endif diff --git a/src/lib/linalg/square_norm_32.c b/src/lib/linalg/square_norm_32.c index b331620cd..53207f454 100644 --- a/src/lib/linalg/square_norm_32.c +++ b/src/lib/linalg/square_norm_32.c @@ -65,7 +65,7 @@ float square_norm_32(const spinor32 *const P, const int N, const int parallel) { #ifdef TM_USE_MPI if (parallel) { - MPI_Allreduce(&res, &mres, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_FLOAT, MPI_SUM, app()->mpi.comm); return mres; } #endif @@ -113,7 +113,7 @@ float square_norm_ts_32(const spinor32 *const P, const int N, const int parallel #ifdef TM_USE_MPI if (parallel) { - MPI_Allreduce(&res, &mres, 1, MPI_FLOAT, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_FLOAT, MPI_SUM, app()->mpi.comm); return mres; } #endif diff --git a/src/lib/meas/correlators.c b/src/lib/meas/correlators.c index 36c8d2776..ed50eba3f 100644 --- a/src/lib/meas/correlators.c +++ b/src/lib/meas/correlators.c @@ -125,7 +125,7 @@ void correlators_measurement(const int traj, const int id, const int ieo) { t0 = (int)(measurement_list[id].max_source_slice * tmp); } #ifdef TM_USE_MPI - MPI_Bcast(&t0, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&t0, 1, MPI_INT, 0, app()->mpi.comm); #endif if (g_debug_level > 1 && g_proc_id == 0) { printf("# timeslice set to %d (T=%d) for online measurement\n", t0, g_nproc_t * T); @@ -252,7 +252,7 @@ void correlators_measurement(const int traj, const int id, const int ieo) { free(sCpp); free(sCpa); free(sCp4); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #else free(Cpp); free(Cpa); diff --git a/src/lib/meas/measure_clover_field_strength_observables.c b/src/lib/meas/measure_clover_field_strength_observables.c index 3d33ae999..b95a2390a 100644 --- a/src/lib/meas/measure_clover_field_strength_observables.c +++ b/src/lib/meas/measure_clover_field_strength_observables.c @@ -210,9 +210,9 @@ void measure_clover_field_strength_observables(const su3 **const gf, #endif #ifdef TM_USE_MPI - MPI_Allreduce(&Eres, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&Eres, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); Eres = mres; - MPI_Allreduce(&Qres, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&Qres, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); Qres = mres; #endif fso->E = energy_density_normalization * Eres; diff --git a/src/lib/meas/oriented_plaquettes.c b/src/lib/meas/oriented_plaquettes.c index 51e73acf2..1fe9207fd 100644 --- a/src/lib/meas/oriented_plaquettes.c +++ b/src/lib/meas/oriented_plaquettes.c @@ -80,7 +80,7 @@ void measure_oriented_plaquettes(const su3 **const gf, double *plaq) { } #ifdef TM_USE_MPI - MPI_Allreduce(plaq, mplaq, 6, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(plaq, mplaq, 6, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); for (int j = 0; j < 6; j++) plaq[j] = mplaq[j]; #endif tm_stopwatch_pop(&g_timers, 0, 2, ""); diff --git a/src/lib/meas/pion_norm.c b/src/lib/meas/pion_norm.c index 1ee756145..38d13320b 100644 --- a/src/lib/meas/pion_norm.c +++ b/src/lib/meas/pion_norm.c @@ -68,7 +68,7 @@ void pion_norm_measurement(const int traj, const int id, const int ieo) { ranlxs(&tmp, 1); z0 = (int)(measurement_list[id].max_source_slice * tmp); #ifdef TM_USE_MPI - MPI_Bcast(&z0, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&z0, 1, MPI_INT, 0, app()->mpi.comm); #endif Cpp = (double *)calloc(g_nproc_z * LZ, sizeof(double)); diff --git a/src/lib/meas/polyakov_loop.c b/src/lib/meas/polyakov_loop.c index 25deea402..14d7c9ba5 100644 --- a/src/lib/meas/polyakov_loop.c +++ b/src/lib/meas/polyakov_loop.c @@ -66,7 +66,7 @@ void polyakov_loop(_Complex double *pl_, const int mu) { fprintf(stderr, "Only direction %d and %d are allowed.\n", 2, 3); fprintf(stderr, "Actual value is %d! Aborting...\n", mu); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 10); + MPI_Abort(app()->mpi.comm, 10); MPI_Finalize(); #endif exit(0); @@ -137,7 +137,7 @@ void polyakov_loop(_Complex double *pl_, const int mu) { /* Collect the results and return:*/ #ifdef TM_USE_MPI - MPI_Allreduce(&pl, &pls, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&pl, &pls, 2, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); pl = pls; #endif diff --git a/src/lib/measure_gauge_action.c b/src/lib/measure_gauge_action.c index ecbe7a888..654990166 100644 --- a/src/lib/measure_gauge_action.c +++ b/src/lib/measure_gauge_action.c @@ -96,7 +96,7 @@ double measure_plaquette(const su3 *const *const gf) { for (int i = 0; i < omp_num_threads; ++i) res += g_omp_acc_re[i]; #endif #ifdef TM_USE_MPI - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); res = mres; #endif return res; @@ -178,7 +178,7 @@ double measure_gauge_action(const su3 *const *const gf, const double lambda) { for (int i = 0; i < omp_num_threads; ++i) res += g_omp_acc_re[i]; #endif #ifdef TM_USE_MPI - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); res = mres; #endif GaugeInfo.plaquetteEnergy = res; diff --git a/src/lib/measure_rectangles.c b/src/lib/measure_rectangles.c index e6f59b8b2..4f0ca07fd 100644 --- a/src/lib/measure_rectangles.c +++ b/src/lib/measure_rectangles.c @@ -129,7 +129,7 @@ double measure_rectangles(const su3 **const gf) { #else #endif #ifdef TM_USE_MPI - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); res = mres; #endif diff --git a/src/lib/monomial/moment_energy.c b/src/lib/monomial/moment_energy.c index 01776edfa..c9dfe16e6 100644 --- a/src/lib/monomial/moment_energy.c +++ b/src/lib/monomial/moment_energy.c @@ -69,7 +69,7 @@ double moment_energy(su3adj **const momenta) { kc = 0.5 * (ks + kc); #ifdef TM_USE_MPI ks = kc; - MPI_Allreduce(&ks, &kc, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&ks, &kc, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); #endif etime = gettime(); if (g_proc_id == 0) { diff --git a/src/lib/monomial/monitor_forces.c b/src/lib/monomial/monitor_forces.c index dc186ac05..ea64ddcfa 100644 --- a/src/lib/monomial/monitor_forces.c +++ b/src/lib/monomial/monitor_forces.c @@ -89,9 +89,9 @@ void monitor_forces(hamiltonian_field_t* const hf) { // output for force monitoring #ifdef TM_USE_MPI - MPI_Reduce(&sum, &sum2, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&sum, &sum2, 1, MPI_DOUBLE, MPI_SUM, 0, app()->mpi.comm); sum = sum2; - MPI_Reduce(&max, &sum2, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + MPI_Reduce(&max, &sum2, 1, MPI_DOUBLE, MPI_MAX, 0, app()->mpi.comm); max = sum2; #endif if (g_proc_id == 0) { diff --git a/src/lib/mpi_init.c b/src/lib/mpi_init.c index f245f0556..6a8c81e40 100644 --- a/src/lib/mpi_init.c +++ b/src/lib/mpi_init.c @@ -159,7 +159,7 @@ void reduce_su3_ray(void *u_i /* in */, void *u_io /* in/out */, int *len /* in if (*dt != mpi_su3) { fprintf(stderr, "\nInvalid datatype for reduce_su3_ray(); abort.\n"); - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); } for (n = 0; n < *len; n++) { _su3_times_su3(tmp, *(u + n), *(v + n)) _su3_assign(*(v + n), tmp) @@ -249,8 +249,8 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { dims[2] = N_PROC_Y; dims[3] = N_PROC_Z; - MPI_Comm_size(MPI_COMM_WORLD, &g_nproc); - MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); + MPI_Comm_size(app()->mpi.comm, &g_nproc); + MPI_Comm_rank(app()->mpi.comm, &g_proc_id); MPI_Get_processor_name(processor_name, &namelen); MPI_Dims_create(g_nproc, nalldims, dims); if (g_proc_id == 0) { @@ -273,7 +273,7 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { fprintf(stderr, "Please check your number of processors and the Nr?Procs input variables\n"); fprintf(stderr, "Aborting...!\n"); } - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); exit(-1); } @@ -332,7 +332,7 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { halffield_buffer_z2 = (halfspinor *)malloc(T * LX * LY / 2 * sizeof(halfspinor)); #endif - MPI_Cart_create(MPI_COMM_WORLD, nalldims, dims, periods, reorder, &g_cart_grid); + MPI_Cart_create(app()->mpi.comm, nalldims, dims, periods, reorder, &g_cart_grid); MPI_Comm_rank(g_cart_grid, &g_cart_id); MPI_Cart_coords(g_cart_grid, g_cart_id, nalldims, g_proc_coords); if (g_debug_level > 1) { diff --git a/src/lib/operator/clover_det.c b/src/lib/operator/clover_det.c index 20984ba7f..f7f3d28cb 100644 --- a/src/lib/operator/clover_det.c +++ b/src/lib/operator/clover_det.c @@ -169,7 +169,7 @@ double sw_trace(const int ieo, const double mu) { #endif #ifdef TM_USE_MPI - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); tm_stopwatch_pop(&g_timers, 0, 1, ""); return (mres); #else @@ -259,7 +259,7 @@ double sw_trace_nd(const int ieo, const double mu, const double eps) { #endif #ifdef TM_USE_MPI - MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); tm_stopwatch_pop(&g_timers, 0, 1, ""); return (mres); #else diff --git a/src/lib/prepare_source.c b/src/lib/prepare_source.c index 3b45c35a1..8f15c9c7b 100644 --- a/src/lib/prepare_source.c +++ b/src/lib/prepare_source.c @@ -91,7 +91,7 @@ void prepare_source(const int nstore, const int isample, const int ix, const int } } #ifdef TM_USE_MPI - MPI_Bcast(&t, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&t, 1, MPI_INT, 0, app()->mpi.comm); #endif SourceInfo.t = t; } @@ -180,7 +180,7 @@ void prepare_source(const int nstore, const int isample, const int ix, const int t = (int)(u * g_nproc_t * T); } #ifdef TM_USE_MPI - MPI_Bcast(&t, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&t, 1, MPI_INT, 0, app()->mpi.comm); #endif SourceInfo.t = t; } @@ -206,7 +206,7 @@ void prepare_source(const int nstore, const int isample, const int ix, const int } } #ifdef TM_USE_MPI - MPI_Bcast(&t, 1, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(&t, 1, MPI_INT, 0, app()->mpi.comm); #endif SourceInfo.t = t; } @@ -315,7 +315,7 @@ void prepare_source(const int nstore, const int isample, const int ix, const int if (read_spinor(g_spinor_field[2], g_spinor_field[3], source_filename, 0) != 0) { fprintf(stderr, "Error reading source! Aborting...\n"); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(-1); diff --git a/src/lib/qphix/qphix_interface.cpp b/src/lib/qphix/qphix_interface.cpp index 2c61427dd..4469d3109 100644 --- a/src/lib/qphix/qphix_interface.cpp +++ b/src/lib/qphix/qphix_interface.cpp @@ -224,9 +224,9 @@ void _initQphix(int argc, char **argv, tm_QPhiXParams_t params, int c12, QphixPr g_proc_coords[2], g_proc_coords[3], g_proc_coords[0]); free(qmp_coords); fflush(stdout); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); } else { - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); } } } diff --git a/src/lib/quda_interface.c b/src/lib/quda_interface.c index 0e55f5cb9..f2586c7cf 100644 --- a/src/lib/quda_interface.c +++ b/src/lib/quda_interface.c @@ -2926,20 +2926,20 @@ void quda_mg_tune_params(void *spinorOut, void *spinorIn, const int max_iter) { copy_quda_mg_tunable_params(&tunable_params[0], &cur_params); print_tunable_params_pair(&cur_params, &tunable_params[0], mg_n_level); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); tm_stopwatch_push(&g_timers, "updateMultigridQuda", ""); updateMultigridQuda(quda_mg_preconditioner, &quda_mg_param); tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); tm_stopwatch_push(&g_timers, "invertQuda", ""); invertQuda(spinorOut, spinorIn, &inv_param); tunable_params[0].tts = inv_param.secs; tunable_params[0].iter = inv_param.iter; tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); for (i = 1; i < quda_mg_tuning_plan.mg_tuning_iterations; i++) { // the best params from all previous iterations @@ -2984,16 +2984,16 @@ void quda_mg_tune_params(void *spinorOut, void *spinorIn, const int max_iter) { print_tunable_params_pair(&cur_params, &tunable_params[i], mg_n_level); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); tm_stopwatch_push(&g_timers, "updateMultigridQuda", ""); updateMultigridQuda(quda_mg_preconditioner, &quda_mg_param); tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); tm_stopwatch_push(&g_timers, "invertQuda", ""); invertQuda(spinorOut, spinorIn, &inv_param); tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA"); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); tunable_params[i].tts = inv_param.secs; tunable_params[i].iter = inv_param.iter; diff --git a/src/lib/reweighting_factor.c b/src/lib/reweighting_factor.c index df283f6c0..8e39d3148 100644 --- a/src/lib/reweighting_factor.c +++ b/src/lib/reweighting_factor.c @@ -163,6 +163,6 @@ void reweighting_factor(const int N, const int nstore) { free(data); free(trlog); #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif // TM_USE_MPI } diff --git a/src/lib/sighandler.c b/src/lib/sighandler.c index 7bcb36d15..1447a678e 100644 --- a/src/lib/sighandler.c +++ b/src/lib/sighandler.c @@ -49,7 +49,7 @@ void catch_ill_inst(int s) { fprintf(stderr, "Aborting...\n"); fflush(stdout); #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(0); diff --git a/src/lib/solver/dfl_projector.c b/src/lib/solver/dfl_projector.c index b840aabfa..f54e0b998 100644 --- a/src/lib/solver/dfl_projector.c +++ b/src/lib/solver/dfl_projector.c @@ -428,7 +428,7 @@ void little_project(_Complex double *const out, _Complex double *const in, const } #ifdef TM_USE_MPI - MPI_Allreduce(phi, psi, N, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(phi, psi, N, MPI_DOUBLE_COMPLEX, MPI_SUM, app()->mpi.comm); #else memcpy(psi, phi, N * sizeof(_Complex double)); #endif @@ -487,7 +487,7 @@ void little_project2(_Complex double *const out, _Complex double *const in, cons phi[i] = lscalar_prod(little_dfl_fields[i], in, nb_blocks * N, 0); } #ifdef TM_USE_MPI - MPI_Allreduce(phi, psi, g_N_s, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(phi, psi, g_N_s, MPI_DOUBLE_COMPLEX, MPI_SUM, app()->mpi.comm); #else memcpy(psi, phi, g_N_s * sizeof(_Complex double)); #endif @@ -1023,7 +1023,7 @@ void check_little_D_inversion(const int repro) { little_D(result, invvec); /* This should be a proper inverse now */ #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif ldiff(invvec, result, inprod, nb_blocks * g_N_s); diff --git a/src/lib/solver/gcr4complex_body.c b/src/lib/solver/gcr4complex_body.c index 29351d87f..556838c83 100644 --- a/src/lib/solver/gcr4complex_body.c +++ b/src/lib/solver/gcr4complex_body.c @@ -218,7 +218,7 @@ _F_TYPE _PSWITCH(lsquare_norm)(_C_TYPE *const Q, const int N, const int parallel #ifdef TM_USE_MPI if (parallel) { double nrm2 = nrm; - MPI_Allreduce(&nrm2, &nrm, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&nrm2, &nrm, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); } #endif @@ -253,7 +253,7 @@ _C_TYPE _PSWITCH(lscalar_prod)(_C_TYPE *const R, _C_TYPE *const S, const int N, #ifdef TM_USE_MPI if (parallel) { _Complex double res2 = res; - MPI_Allreduce(&res2, &res, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res2, &res, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, app()->mpi.comm); } #endif @@ -289,7 +289,7 @@ _F_TYPE _PSWITCH(lscalar_prod_r)(_C_TYPE *const R, _C_TYPE *const S, const int N #ifdef TM_USE_MPI if (parallel) { double res2 = res; - MPI_Allreduce(&res2, &res, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&res2, &res, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); } #endif diff --git a/src/lib/solver/little_project_eo_body.c b/src/lib/solver/little_project_eo_body.c index eaca17537..7d43100eb 100644 --- a/src/lib/solver/little_project_eo_body.c +++ b/src/lib/solver/little_project_eo_body.c @@ -37,7 +37,7 @@ void _PSWITCH(little_project_eo)(_Complex _F_TYPE *const out, _Complex _F_TYPE * } #ifdef TM_USE_MPI - MPI_Allreduce(phi, psi, N, _MPI_C_TYPE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(phi, psi, N, _MPI_C_TYPE, MPI_SUM, app()->mpi.comm); #else memcpy(psi, phi, N * sizeof(_Complex _F_TYPE)); #endif diff --git a/src/lib/start.c b/src/lib/start.c index 7316ec9bd..5504a83a2 100644 --- a/src/lib/start.c +++ b/src/lib/start.c @@ -218,7 +218,7 @@ void random_spinor_field_lexic(spinor *const k, const int repro, const enum RN_T } else if (g_proc_id == 0) { rlxd_get(rlxd_state); } - MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(rlxd_state, 105, MPI_INT, 0, app()->mpi.comm); if (g_proc_id != 0) { rlxd_reset(rlxd_state); } @@ -289,7 +289,7 @@ void random_spinor_field_eo(spinor *const k, const int repro, const enum RN_TYPE } else if (g_proc_id == 0) { rlxd_get(rlxd_state); } - MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(rlxd_state, 105, MPI_INT, 0, app()->mpi.comm); if (g_proc_id != 0) { rlxd_reset(rlxd_state); } @@ -432,7 +432,7 @@ void random_gauge_field(const int repro, su3 **const gf) { } else if (g_proc_id == 0) { rlxd_get(rlxd_state); } - MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(rlxd_state, 105, MPI_INT, 0, app()->mpi.comm); rlxd_reset(rlxd_state); #endif for (t0 = 0; t0 < g_nproc_t * T; t0++) { @@ -506,7 +506,7 @@ double random_su3adj_field(const int repro, su3adj **const momenta) { } else if (g_proc_id == 0) { rlxd_get(rlxd_state); } - MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Bcast(rlxd_state, 105, MPI_INT, 0, app()->mpi.comm); rlxd_reset(rlxd_state); #endif for (int t0 = 0; t0 < g_nproc_t * T; t0++) { @@ -589,7 +589,7 @@ double random_su3adj_field(const int repro, su3adj **const momenta) { kc = 0.5 * (ks + kc); } #ifdef TM_USE_MPI - MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&kc, &ks, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); return ks; #endif return kc; @@ -838,7 +838,7 @@ void start_ranlux(int level, int seed) { #ifdef TM_USE_MPI unsigned int *seeds = calloc(g_nproc, sizeof(unsigned int)); if (seeds == NULL) fatal_error("Memory allocation for seeds buffer failed!", "start_ranlux"); - MPI_Gather(&loc_seed, 1, MPI_UNSIGNED, seeds, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD); + MPI_Gather(&loc_seed, 1, MPI_UNSIGNED, seeds, 1, MPI_UNSIGNED, 0, app()->mpi.comm); if (g_proc_id == 0) { for (int i = 0; i < g_nproc; ++i) { for (int j = i + 1; j < g_nproc; ++j) { diff --git a/src/lib/test/check_xchange.c b/src/lib/test/check_xchange.c index a20f86df4..7b4c2067b 100644 --- a/src/lib/test/check_xchange.c +++ b/src/lib/test/check_xchange.c @@ -85,9 +85,9 @@ int check_xchange() { } #endif - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); xchange_field(g_spinor_field[0], 0); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); x = (double*)&g_spinor_field[0][VOLUME / 2]; for (i = 0; i < LX * LY * LZ / 2 * 24; i++, x++) { @@ -95,7 +95,7 @@ int check_xchange() { printf("The exchange up of fields in time direction\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -107,7 +107,7 @@ int check_xchange() { printf("The exchange down of fields in time direction\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_dn); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -120,7 +120,7 @@ int check_xchange() { printf("The exchange up of fields in x direction\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_x_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -132,7 +132,7 @@ int check_xchange() { printf("The exchange down of fields in x direction\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_x_dn); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -146,7 +146,7 @@ int check_xchange() { printf("The exchange up of fields in y direction\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_y_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -159,7 +159,7 @@ int check_xchange() { printf("The exchange down of fields in y direction\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_y_dn); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -178,9 +178,9 @@ int check_xchange() { } } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); xchange_field(g_spinor_field[0], 1); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); x = (double*)&g_spinor_field[0][VOLUME / 2 + 2 * LX * LY * LZ / 2 + 2 * T * LY * LZ / 2 + 2 * T * LX * LZ / 2]; @@ -189,7 +189,7 @@ int check_xchange() { printf("The exchange up of fields in z (1) direction up\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_z_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -202,7 +202,7 @@ int check_xchange() { printf("The exchange down of fields in z (1) direction down\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_z_dn); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -219,9 +219,9 @@ int check_xchange() { } } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); xchange_field(g_spinor_field[0], 1); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); x = (double*)&g_spinor_field[0][VOLUME / 2 + 2 * LX * LY * LZ / 2 + 2 * T * LY * LZ / 2 + 2 * T * LX * LZ / 2]; @@ -230,7 +230,7 @@ int check_xchange() { printf("The exchange up of fields in z (0) direction up\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_z_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -243,7 +243,7 @@ int check_xchange() { printf("The exchange down of fields in z (0) direction down\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_z_dn); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -312,9 +312,9 @@ int check_xchange() { } #endif - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); xchange_gauge(g_gauge_field); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); x = (double*)&g_gauge_field[T * LX * LY * LZ][0]; for (i = 0; i < LX * LY * LZ * 72; i++, x++) { @@ -322,7 +322,7 @@ int check_xchange() { printf("The exchange up of gaugefields in time direction\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -334,7 +334,7 @@ int check_xchange() { printf("The exchange down of gaugefields in time direction\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_dn); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -348,7 +348,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_x_up); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -361,7 +361,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_x_dn); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -376,7 +376,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_y_up); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -389,7 +389,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_y_dn); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -404,7 +404,7 @@ int check_xchange() { printf("between %d and %d is not correct, down is %d\n", g_cart_id, g_nb_z_up, g_nb_z_dn); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -418,7 +418,7 @@ int check_xchange() { printf("between %d and %d is not correct, up is %d\n", g_cart_id, g_nb_z_dn, g_nb_z_up); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -499,9 +499,9 @@ int check_xchange() { } } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); xchange_gauge(g_gauge_field); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); /* The edges */ #if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) @@ -531,7 +531,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -544,7 +544,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -557,7 +557,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -570,7 +570,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -600,7 +600,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -613,7 +613,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -626,7 +626,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -639,7 +639,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -667,7 +667,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -680,7 +680,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -693,7 +693,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -706,7 +706,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -735,7 +735,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -748,7 +748,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -761,7 +761,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -774,7 +774,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -802,7 +802,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -816,7 +816,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -830,7 +830,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -844,7 +844,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -873,7 +873,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -887,7 +887,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -901,7 +901,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -915,7 +915,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -973,9 +973,9 @@ int check_xchange() { } } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); xchange_gauge(g_gauge_field); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); x = (double*)&g_gauge_field[VOLUMEPLUSRAND][0]; for (i = 0; i < LX * LY * LZ * 72; i++, x++) { @@ -983,7 +983,7 @@ int check_xchange() { printf("The exchange up of gaugefields in time direction\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -995,7 +995,7 @@ int check_xchange() { printf("The exchange up of gaugefields in time direction\n"); printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1009,7 +1009,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_x_up); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1022,7 +1022,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_x_dn); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1037,7 +1037,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_y_up); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1051,7 +1051,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_y_dn); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1067,7 +1067,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_z_up); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1081,7 +1081,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_z_dn); printf("%d %d %d\n", g_cart_id, i, (int)(*x)); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1108,9 +1108,9 @@ int check_xchange() { } } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); xchange_gauge(g_gauge_field); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); /* Now there should be in the t and t2 Rand certain values set */ @@ -1128,7 +1128,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_up); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), g_nb_t_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1141,7 +1141,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_up); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), g_nb_t_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1154,7 +1154,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_dn); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), g_nb_t_dn); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1167,7 +1167,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_dn); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), g_nb_t_dn); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1181,7 +1181,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_up); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), g_nb_t_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1194,7 +1194,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_up); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), g_nb_t_up); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1207,7 +1207,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_dn); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), g_nb_t_dn); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1220,7 +1220,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, g_nb_t_dn); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), g_nb_t_dn); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1328,9 +1328,9 @@ int check_xchange() { } } #endif - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); xchange_gauge(g_gauge_field); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) di[0] = (g_proc_coords[0] - 1) % g_nproc_t; @@ -1355,7 +1355,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1368,7 +1368,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1381,7 +1381,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1394,7 +1394,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1407,7 +1407,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1420,7 +1420,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1433,7 +1433,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1446,7 +1446,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1477,7 +1477,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1490,7 +1490,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1503,7 +1503,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1516,7 +1516,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1529,7 +1529,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1542,7 +1542,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1555,7 +1555,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1568,7 +1568,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1596,7 +1596,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1609,7 +1609,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1622,7 +1622,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1635,7 +1635,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1648,7 +1648,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1661,7 +1661,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1674,7 +1674,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1687,7 +1687,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1717,7 +1717,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1730,7 +1730,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1744,7 +1744,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1758,7 +1758,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1772,7 +1772,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1786,7 +1786,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1800,7 +1800,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1814,7 +1814,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1844,7 +1844,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1858,7 +1858,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1872,7 +1872,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1886,7 +1886,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1900,7 +1900,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1914,7 +1914,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1928,7 +1928,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1942,7 +1942,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1972,7 +1972,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -1986,7 +1986,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2000,7 +2000,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2014,7 +2014,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2028,7 +2028,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2042,7 +2042,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, pm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), pm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2056,7 +2056,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mp); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mp); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2070,7 +2070,7 @@ int check_xchange() { printf("between %d and %d is not correct\n", g_cart_id, mm); printf("%d %d (%d != %d)\n", g_cart_id, i, (int)(*x), mm); printf("Program aborted\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2190,9 +2190,9 @@ int check_xchange() { } #endif - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); xchange_deri(df0); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #if defined TM_PARALLELT for (x1 = 0; x1 < LX; x1++) { @@ -2205,7 +2205,7 @@ int check_xchange() { if ((int)x[j] != g_nb_t_up) { printf("Exchange of derivatives is working not correctly (1u)!\n"); printf("Aborting program!"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2218,7 +2218,7 @@ int check_xchange() { if ((int)x[j] != g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (1d)!\n"); printf("Aborting program!"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2239,7 +2239,7 @@ int check_xchange() { if ((int)x[j] != g_nb_t_up) { printf("Exchange of derivatives is working not correctly (2u)!\n"); printf("Aborting program!"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2252,7 +2252,7 @@ int check_xchange() { if ((int)x[j] != g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (2d)!\n"); printf("Aborting program!"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2261,7 +2261,7 @@ int check_xchange() { } } } - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); for (x0 = 1; x0 < T - 1; x0++) { for (x2 = 0; x2 < LY; x2++) { for (x3 = 0; x3 < LZ; x3++) { @@ -2272,7 +2272,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up) { printf("Exchange of derivatives is working not correctly (3u)!\n"); printf("Aborting program!"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2285,7 +2285,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn) { printf("Exchange of derivatives is working not correctly (3d)!\n"); printf("Aborting program!"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2303,7 +2303,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up + g_nb_t_up) { printf("Exchange of derivatives is working not correctly (4uu)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2316,7 +2316,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up + g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (4ud)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2329,7 +2329,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn + g_nb_t_up) { printf("Exchange of derivatives is working not correctly (4du)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2342,7 +2342,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn + g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (4dd)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2362,7 +2362,7 @@ int check_xchange() { if ((int)x[j] != g_nb_t_up) { printf("Exchange of derivatives is working not correctly (5u)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2375,7 +2375,7 @@ int check_xchange() { if ((int)x[j] != g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (5d)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2394,7 +2394,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up) { printf("Exchange of derivatives is working not correctly (6u)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2407,7 +2407,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn) { printf("Exchange of derivatives is working not correctly (6d)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2426,7 +2426,7 @@ int check_xchange() { if ((int)x[j] != g_nb_y_up) { printf("Exchange of derivatives is working not correctly (7u)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2439,7 +2439,7 @@ int check_xchange() { if ((int)x[j] != g_nb_y_dn) { printf("Exchange of derivatives is working not correctly (7d)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2457,7 +2457,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up + g_nb_t_up) { printf("Exchange of derivatives is working not correctly (8uu)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2470,7 +2470,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up + g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (8ud)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2483,7 +2483,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn + g_nb_t_up) { printf("Exchange of derivatives is working not correctly (8du)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2496,7 +2496,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn + g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (8dd)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2513,7 +2513,7 @@ int check_xchange() { if ((int)x[j] != g_nb_t_up + g_nb_y_up) { printf("Exchange of derivatives is working not correctly (9uu)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2526,7 +2526,7 @@ int check_xchange() { if ((int)x[j] != g_nb_t_up + g_nb_y_dn) { printf("Exchange of derivatives is working not correctly (9ud)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2541,7 +2541,7 @@ int check_xchange() { printf("%d %d %d %d %d %d %d\n", (int)x[j], g_nb_t_dn, g_nb_t_up, g_nb_y_dn, g_nb_y_up, x1, x3); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2554,7 +2554,7 @@ int check_xchange() { if ((int)x[j] != g_nb_t_dn + g_nb_y_dn) { printf("Exchange of derivatives is working not correctly (9dd)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2571,7 +2571,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up + g_nb_y_up) { printf("Exchange of derivatives is working not correctly (10uu)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2584,7 +2584,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up + g_nb_y_dn) { printf("Exchange of derivatives is working not correctly (10ud)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2597,7 +2597,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn + g_nb_y_up) { printf("Exchange of derivatives is working not correctly (10du)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2610,7 +2610,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn + g_nb_y_dn) { printf("Exchange of derivatives is working not correctly (10dd)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2626,7 +2626,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up + g_nb_y_up + g_nb_t_up) { printf("Exchange of derivatives is working not correctly (11uuu)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2639,7 +2639,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn + g_nb_y_up + g_nb_t_up) { printf("Exchange of derivatives is working not correctly (11duu)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2652,7 +2652,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn + g_nb_y_up + g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (11dud)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2665,7 +2665,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn + g_nb_y_dn + g_nb_t_up) { printf("Exchange of derivatives is working not correctly (11ddu)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2678,7 +2678,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up + g_nb_y_dn + g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (11udd)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2691,7 +2691,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up + g_nb_y_up + g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (11uud)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2704,7 +2704,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up + g_nb_y_dn + g_nb_t_up) { printf("Exchange of derivatives is working not correctly (11udu)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2717,7 +2717,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_dn + g_nb_y_dn + g_nb_t_dn) { printf("Exchange of derivatives is working not correctly (11ddd)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2735,7 +2735,7 @@ int check_xchange() { if ((int)x[j] != 0) { printf("Exchange of derivatives is working not correctly (bulk XYT)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2761,7 +2761,7 @@ int check_xchange() { printf("%d %d %d %d %d\n", x1, x2, x3, ix, g_proc_id); printf("%f %d %d\n", df0[ix][mu].d8, g_nb_t_up, g_nb_t_dn); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2780,7 +2780,7 @@ int check_xchange() { if ((int)x[j] != g_nb_x_up) { printf("Exchange of derivatives is working not correctly (13)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2800,7 +2800,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (14)!\n"); printf("%d %d %d %d %d\n", x0, x1, x3, ix, g_proc_id); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2820,7 +2820,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (15)!\n"); printf("%d %d %d %d %d\n", x0, x1, x3, ix, g_proc_id); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2838,7 +2838,7 @@ int check_xchange() { if ((int)x[j] != (g_nb_x_up + g_nb_t_up)) { printf("Exchange of derivatives is working not correctly (16)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2855,7 +2855,7 @@ int check_xchange() { if ((int)x[j] != (g_nb_y_up + g_nb_t_up)) { printf("Exchange of derivatives is working not correctly (17)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2872,7 +2872,7 @@ int check_xchange() { if ((int)x[j] != (g_nb_y_up + g_nb_x_up)) { printf("Exchange of derivatives is working not correctly (18)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2890,7 +2890,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (19)!\n"); printf("%f %d %d %d\n", df0[ix][mu].d1, g_nb_x_up + g_nb_z_up, g_nb_x_up, g_nb_z_up); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2907,7 +2907,7 @@ int check_xchange() { if ((int)x[j] != (g_nb_y_up + g_nb_z_up)) { printf("Exchange of derivatives is working not correctly (20)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2924,7 +2924,7 @@ int check_xchange() { if ((int)x[j] != (g_nb_t_up + g_nb_z_up)) { printf("Exchange of derivatives is working not correctly (21)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2940,7 +2940,7 @@ int check_xchange() { if ((int)x[j] != (g_nb_t_up + g_nb_x_up + g_nb_y_up)) { printf("Exchange of derivatives is working not correctly (22)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2955,7 +2955,7 @@ int check_xchange() { if ((int)x[j] != (g_nb_t_up + g_nb_x_up + g_nb_z_up)) { printf("Exchange of derivatives is working not correctly (23)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2970,7 +2970,7 @@ int check_xchange() { if ((int)x[j] != (g_nb_t_up + g_nb_z_up + g_nb_y_up)) { printf("Exchange of derivatives is working not correctly (24)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2985,7 +2985,7 @@ int check_xchange() { if ((int)x[j] != (g_nb_z_up + g_nb_x_up + g_nb_y_up)) { printf("Exchange of derivatives is working not correctly (25)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -2999,7 +2999,7 @@ int check_xchange() { if ((int)x[j] != (g_nb_z_up + g_nb_x_up + g_nb_y_up + g_nb_t_up)) { printf("Exchange of derivatives is working not correctly (26)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3135,9 +3135,9 @@ int check_xchange() { #endif - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); xchange_deri(df0); - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) @@ -3171,7 +3171,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e5mm)!\n"); printf("%f %d %d %d %d\n", x[j], g_nb_t_up, g_nb_x_up, pp, mm); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3185,7 +3185,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e5mp)!\n"); printf("%f %d %d %d %d\n", x[j], g_nb_t_up, g_nb_x_up, pm, mp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3199,7 +3199,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e5pm)!\n"); printf("%f %d %d %d %d\n", x[j], g_nb_t_up, g_nb_x_up, pm, mp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3213,7 +3213,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e5pp)!\n"); printf("%f %d %d %d %d\n", x[j], g_nb_t_up, g_nb_x_up, pp, mm); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3251,7 +3251,7 @@ int check_xchange() { if ((int)x[j] != mm) { printf("Exchange of derivatives is working not correctly (e6mm)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3265,7 +3265,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e6pm)!\n"); printf("%f %d %d %d %d\n", x[j], g_nb_x_up, g_nb_y_up, pm, mp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3279,7 +3279,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e6mp)!\n"); printf("%f %d %d %d %d\n", x[j], g_nb_x_up, g_nb_y_up, pm, mp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3293,7 +3293,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e6pp)!\n"); printf("%f %d %d %d %d\n", x[j], g_nb_x_up, g_nb_y_up, pp, mm); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3326,7 +3326,7 @@ int check_xchange() { if ((int)x[j] != mm) { printf("Exchange of derivatives is working not correctly (e7mm)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3340,7 +3340,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e7pm)!\n"); printf("%f %d %d %d %d\n", x[j], g_nb_t_up, g_nb_y_up, pm, pm); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3354,7 +3354,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e7mp)!\n"); printf("%f %d %d %d %d\n", x[j], g_nb_t_up, g_nb_y_up, pm, mp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3368,7 +3368,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e7pp)!\n"); printf("%f %d %d %d %d\n", x[j], g_nb_t_up, g_nb_y_up, pp, mm); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3402,7 +3402,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e8mmm)!\n"); printf("%d %d %d %d %d\n", (int)x[j], mm, mp, pm, pp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3435,7 +3435,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e8pmm)!\n"); printf("%d %d %d %d %d\n", (int)x[j], mm, mp, pm, pp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3468,7 +3468,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e8pmp)!\n"); printf("%d %d %d %d %d\n", (int)x[j], mm, mp, pm, pp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3501,7 +3501,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e8ppp)!\n"); printf("%d %d %d %d %d\n", (int)x[j], mm, mp, pm, pp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3534,7 +3534,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e8mpm)!\n"); printf("%d %d %d %d %d\n", (int)x[j], mm, mp, pm, pp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3567,7 +3567,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e8mmp)!\n"); printf("%d %d %d %d %d\n", (int)x[j], mm, mp, pm, pp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3600,7 +3600,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e8mpp)!\n"); printf("%d %d %d %d %d\n", (int)x[j], mm, mp, pm, pp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3633,7 +3633,7 @@ int check_xchange() { printf("Exchange of derivatives is working not correctly (e8ppm)!\n"); printf("%d %d %d %d %d\n", (int)x[j], mm, mp, pm, pp); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } @@ -3651,7 +3651,7 @@ int check_xchange() { if ((int)x[j] != 0) { printf("Exchange of derivatives is working not correctly (ebulk XYT)!\n"); printf("Aborting program!\n"); - MPI_Abort(MPI_COMM_WORLD, 5); + MPI_Abort(app()->mpi.comm, 5); MPI_Finalize(); exit(0); } diff --git a/src/lib/update_tm.c b/src/lib/update_tm.c index 3f1cdc5d5..df0e0fd54 100644 --- a/src/lib/update_tm.c +++ b/src/lib/update_tm.c @@ -174,7 +174,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy, char *filename the other sites */ ranlxd(yy, 1); #ifdef TM_USE_MPI - MPI_Bcast(&yy[0], 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); + MPI_Bcast(&yy[0], 1, MPI_DOUBLE, 0, app()->mpi.comm); #endif /* when acctest is 0 (i.e. do not perform acceptance test), the trajectory is accepted whatever @@ -277,7 +277,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy, char *filename #ifdef TM_USE_MPI tmp = ret_gauge_diff; - MPI_Reduce(&tmp, &ret_gauge_diff, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + MPI_Reduce(&tmp, &ret_gauge_diff, 1, MPI_DOUBLE, MPI_SUM, 0, app()->mpi.comm); #endif /* compute the total H */ tmp = enep; diff --git a/src/lib/wrapper/lib_wrapper.c b/src/lib/wrapper/lib_wrapper.c index 19d36ddc6..71d78bed4 100644 --- a/src/lib/wrapper/lib_wrapper.c +++ b/src/lib/wrapper/lib_wrapper.c @@ -98,7 +98,7 @@ int tmLQCD_invert_init(int argc, char* argv[], const int _verbose, const int ext g_use_clover_flag = 0; #ifdef TM_USE_MPI - MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); + MPI_Comm_rank(app()->mpi.comm, &g_proc_id); #else g_proc_id = 0; #endif @@ -341,7 +341,7 @@ int tmLQCD_finalise() { free_moment_field(); free_chi_spinor_field(); #ifdef TM_USE_MPI - MPI_Barrier(MPI_COMM_WORLD); + MPI_Barrier(app()->mpi.comm); #endif return (0); } diff --git a/tests/test_buffers.c b/tests/test_buffers.c index d398c9768..7fedf2fe7 100644 --- a/tests/test_buffers.c +++ b/tests/test_buffers.c @@ -13,7 +13,7 @@ TEST_SUITES{TEST_SUITE_ADD(BUFFERS_GAUGE), TEST_SUITES_CLOSURE}; int main(int argc, char *argv[]) { #ifdef TM_USE_MPI MPI_Init(&argc, &argv); - MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); + MPI_Comm_rank(app()->mpi.comm, &g_proc_id); #else g_proc_id = 0; #endif From 33da95cd4043949acf074f16894b0186332b4686 Mon Sep 17 00:00:00 2001 From: Roman Gruber Date: Wed, 25 Feb 2026 15:09:58 +0100 Subject: [PATCH 13/19] we have to give QUDA the current MPI commnicator --- src/lib/quda_interface.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib/quda_interface.c b/src/lib/quda_interface.c index f2586c7cf..b52d4983d 100644 --- a/src/lib/quda_interface.c +++ b/src/lib/quda_interface.c @@ -448,6 +448,7 @@ void _initQuda() { // initialize the QUDA library #ifdef TM_USE_MPI initQuda(-1); // sets device numbers automatically + setMPICommHandleQuda(app()->mpi.comm); // pass the proper MPI communicator to QUDA #else initQuda(0); // scalar build: use device 0 #endif From 06aa7e7d8dd5f527ad0cb4f5fe4637b7d0e80c44 Mon Sep 17 00:00:00 2001 From: Roman Gruber Date: Wed, 25 Feb 2026 15:37:29 +0100 Subject: [PATCH 14/19] more MPI_COMM_WORLD which where forgotten --- src/lib/io/utils_kill_with_error.c | 2 +- src/lib/mpi_init.c | 2 +- src/lib/test/measure_rectangles.debug.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/io/utils_kill_with_error.c b/src/lib/io/utils_kill_with_error.c index 322536bd7..282e5bc4b 100644 --- a/src/lib/io/utils_kill_with_error.c +++ b/src/lib/io/utils_kill_with_error.c @@ -14,7 +14,7 @@ void kill_with_error(LIME_FILE *fh, int const rank, char const *error) { #endif /* TM_USE_LEMON */ #ifdef TM_USE_MPI - MPI_Abort(MPI_COMM_WORLD, 1); + MPI_Abort(app()->mpi.comm, 1); MPI_Finalize(); #endif exit(500); diff --git a/src/lib/mpi_init.c b/src/lib/mpi_init.c index 6a8c81e40..25c79809a 100644 --- a/src/lib/mpi_init.c +++ b/src/lib/mpi_init.c @@ -189,7 +189,7 @@ void tmlqcd_mpi_init(int argc, char *argv[]) { #ifdef TM_USE_MPI #ifdef TM_USE_SHMEM - /* we need that the PE number in MPI_COMM_WORL */ + /* we need that the PE number in MPI_COMM_WORLD */ /* exactly correspond to the one in g_cart_grid */ reorder = 0; #endif diff --git a/src/lib/test/measure_rectangles.debug.c b/src/lib/test/measure_rectangles.debug.c index 422f681b2..9c4f51215 100644 --- a/src/lib/test/measure_rectangles.debug.c +++ b/src/lib/test/measure_rectangles.debug.c @@ -131,7 +131,7 @@ double measure_rectangles() { fclose(debugfile); ga = (kc + ks) / 3.0; #ifdef TM_USE_MPI - MPI_Allreduce(&ga, &gas, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&ga, &gas, 1, MPI_DOUBLE, MPI_SUM, app()->mpi.comm); return gas; #else return ga; From 8e0208188b5366f83c7ab908f1065afd0a29d429 Mon Sep 17 00:00:00 2001 From: Roman Gruber Date: Thu, 26 Feb 2026 10:45:15 +0100 Subject: [PATCH 15/19] prevent c++ name mangling --- src/lib/include/mpi.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/lib/include/mpi.h b/src/lib/include/mpi.h index 0d5b02993..57c645566 100644 --- a/src/lib/include/mpi.h +++ b/src/lib/include/mpi.h @@ -31,6 +31,10 @@ // include *real* MPI header #include_next +#ifdef __cplusplus +extern "C" { +#endif + /** * @brief MPI context @@ -74,4 +78,8 @@ void app_context_init(const MPI_Comm comm); void app_context_finalize(void); +#ifdef __cplusplus +} +#endif + #endif From 48998f4957b85f952a6494bbbf14a04cde58d25e Mon Sep 17 00:00:00 2001 From: Roman Gruber Date: Mon, 2 Mar 2026 11:34:51 +0100 Subject: [PATCH 16/19] added safe allocator --- src/lib/CMakeLists.txt | 6 +++ src/lib/alloc/alloc.c | 81 +++++++++++++++++++++++++++++++++++++++++ src/lib/include/alloc.h | 49 +++++++++++++++++++++++++ 3 files changed, 136 insertions(+) create mode 100755 src/lib/alloc/alloc.c create mode 100755 src/lib/include/alloc.h diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 367f33d5f..70fec08c3 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -73,6 +73,11 @@ list( APP_CONTEXT_SRC_C app_context/app_context.c) +list( + APPEND + ALLOC_SRC_C + alloc/alloc.c) + list( APPEND SOLVER_SRC_C @@ -396,6 +401,7 @@ list( ${IO_SRC_C} ${INIT_SRC_C} ${APP_CONTEXT_SRC_C} + ${ALLOC_SRC_C} ${SOLVER_SRC_C} ${TEST_SRC_C} ${MEAS_SRC_C} diff --git a/src/lib/alloc/alloc.c b/src/lib/alloc/alloc.c new file mode 100755 index 000000000..a853d58d4 --- /dev/null +++ b/src/lib/alloc/alloc.c @@ -0,0 +1,81 @@ +/*********************************************************************** + * + * Copyright (C) 2026 Roman Gruber + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + * Allocation utils + * + * Author: Roman Gruber + * roman.gruber@unibe.ch + * + *******************************************************************************/ + + +#include +#include +#include +#include + + +/** + * @brief Safe malloc implementation that checks malloc for NULL. Never use + * this function instead use the macro safe_malloc() + * + * @param[in] size The allocation size in bytes + * @param[in] file __FILE__ + * @param[in] line __LINE__ + * @param[in] func __func__ + * + * @return Pointer returned by malloc() + */ +void *safe_malloc_impl(size_t size, const char *file, int line, const char *func) +{ + if (size <= 0) { + fprintf(stderr, "safe_malloc: zero-size allocation at %s:%d (%s)\n", file, line, func); + abort(); + } + + void *p = malloc(size); + if (p == NULL) { + fprintf(stderr, "safe_malloc: failed to allocate %zu bytes at %s:%d (%s): %s\n", + size, file, line, func, strerror(errno)); + abort(); + } + return p; +} + + +/** + * @brief Identical to safe_malloc_impl above just that is call calloc + * instead of malloc. Never use this function instead use the macro + * safe_calloc() + */ +void *safe_calloc_impl(size_t size, const char *file, int line, const char *func) +{ + if (size <= 0) { + fprintf(stderr, "safe_calloc: zero-size allocation at %s:%d (%s)\n", file, line, func); + abort(); + } + + void *p = calloc(size, 1); + if (p == NULL) { + fprintf(stderr, "safe_calloc: failed to allocate %zu bytes at %s:%d (%s): %s\n", + size, file, line, func, strerror(errno)); + abort(); + } + return p; +} \ No newline at end of file diff --git a/src/lib/include/alloc.h b/src/lib/include/alloc.h new file mode 100755 index 000000000..a4545b333 --- /dev/null +++ b/src/lib/include/alloc.h @@ -0,0 +1,49 @@ +/*********************************************************************** + * + * Copyright (C) 2026 Roman Gruber + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + * Allocation utils + * + * Author: Roman Gruber + * roman.gruber@unibe.ch + * + *******************************************************************************/ + +#ifndef ALLOC_H +#define ALLOC_H + + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +void *safe_malloc_impl(size_t size, const char *file, int line, const char *func); +void *safe_calloc_impl(size_t size, const char *file, int line, const char *func); +#define safe_malloc(size) safe_malloc_impl((size), __FILE__, __LINE__, __func__) +#define safe_calloc(size) safe_calloc_impl((size), __FILE__, __LINE__, __func__) + + +#ifdef __cplusplus +} +#endif + +#endif From e8078d9725c42d6b4b55b5bf7c59d0d7954e572c Mon Sep 17 00:00:00 2001 From: Roman Gruber Date: Mon, 2 Mar 2026 11:35:40 +0100 Subject: [PATCH 17/19] added ptbc instance and defect structs --- src/lib/app_context/app_context.c | 192 ++++++++++++++++++++++++++++-- src/lib/include/app.h | 108 +++++++++++++++++ src/lib/include/mpi.h | 58 +-------- 3 files changed, 296 insertions(+), 62 deletions(-) create mode 100755 src/lib/include/app.h diff --git a/src/lib/app_context/app_context.c b/src/lib/app_context/app_context.c index 00d5d94b2..aa52e3c39 100644 --- a/src/lib/app_context/app_context.c +++ b/src/lib/app_context/app_context.c @@ -25,35 +25,211 @@ *******************************************************************************/ #include +#include +#include +#include +#include +#include +#include #include #include "fatal_error.h" +/** + * @brief The rank topology struct + */ +typedef struct { + int number_of_nodes; // total number of nodes in the job + int number_of_ranks; // total number of processes in the job, i.e. size of the world communicator + int ranks_per_node; // number of ranks per node + int node_index; // index enumerating the node (unique per node) + int node_rank; // rank number inside the node +} RankTopology; + + +/** + * @brief Gracefully error with function name, file and line number along + * the error message + * + * @param test The test + * @param ... Format parameters + */ +#define err(test, ...) err_impl(test, __func__, __FILE__, __LINE__, __VA_ARGS__) +static void err_impl(const bool test, const char* func, const char* file, const int line, const char* format, ...) +{ + if (test) { + va_list args; + char message[1024]; + va_start(args, format); + vsnprintf(message, 1024, format, args); + va_end(args); + char location[1024]; + snprintf(location, 1024, "%s:%d %s", file, line, func); + fatal_error(message, location); + } +} + + +static void initialize(void); static AppContext app_instance = { .mpi = { - .comm = MPI_COMM_WORLD // default communicator + .comm = MPI_COMM_WORLD, // default communicator + .world_comm = MPI_COMM_WORLD, + }, + .ptbc = { + .instance_id = 0, + .n_instances = 1, + .n_defects = 0, + .active = false, + .initialize = initialize, + .instances = {{.active = false}}, + .defects = {{.active = false}} } }; +/** + * @brief Return the global *immutable* application context struct. To be + * used when reading parameters. + * + * @return Global application context struct + */ const AppContext* app(void) { return &app_instance; } -void app_context_init(const MPI_Comm comm) +/** + * @brief Return the global *mutable* application context struct. To be + * used when initializing/setting parameters. + * + * @return Global application context struct + */ +AppContext* appm(void) +{ + return &app_instance; +} + + +/** + * @brief Return rank topology. + * + * @return The topology. + */ +static RankTopology get_topology(void) { - static bool initialized = false; + int world_rank; + RankTopology topo; + MPI_Comm node_comm, leader_comm; + + MPI_Comm_rank(app_instance.mpi.world_comm, &world_rank); + MPI_Comm_size(app_instance.mpi.world_comm, &topo.number_of_ranks); + MPI_Comm_split_type(app_instance.mpi.world_comm, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &node_comm); + MPI_Comm_rank(node_comm, &topo.node_rank); + bool is_leader = topo.node_rank == 0; + MPI_Comm_split(app_instance.mpi.world_comm, is_leader ? 0 : MPI_UNDEFINED, world_rank, &leader_comm); + if (is_leader) MPI_Comm_size(leader_comm, &topo.number_of_nodes); + if (is_leader) MPI_Comm_rank(leader_comm, &topo.node_index); + MPI_Bcast(&topo.number_of_nodes, 1, MPI_INT, 0, app_instance.mpi.world_comm); + MPI_Bcast(&topo.node_index, 1, MPI_INT, 0, node_comm); - if (initialized) fatal_error("Application context already initialized", __func__); + topo.ranks_per_node = topo.number_of_ranks/topo.number_of_nodes; - app_instance.mpi.comm = comm; - initialized = true; + return topo; } -void app_context_finalize(void) +/** + * @brief Initializes the difference application instances. + */ +static void initialize(void) { - + printf("\033[0;31m[PTBC] Number of chains = %d\033[0m\n", app_instance.ptbc.n_instances); + + int flag; + MPI_Initialized(&flag); + err(!flag, "Initialize has to be called *after* MPI_Init()."); + + MPI_Comm_rank(app_instance.mpi.world_comm, &app_instance.mpi.world_rank); + + // do nothing in case of a single chain + if (app_instance.ptbc.n_instances == 1) return; + + app_instance.ptbc.active = true; + RankTopology topo = get_topology(); + + int instance_size = topo.number_of_ranks / app_instance.ptbc.n_instances; + + err(topo.number_of_ranks % app_instance.ptbc.n_instances != 0, + "PTBC_NCHAINS = %d must divide total number of ranks = %d", + app_instance.ptbc.n_instances, topo.number_of_ranks); + err(instance_size % topo.ranks_per_node != 0 && topo.ranks_per_node % instance_size != 0, + "The number of processes per node = %d and instance_size = %d: one must be divisible by the other", + topo.ranks_per_node, instance_size); + + // We perform a topology-aware splitting of processes into instances using + // MPI_Comm_split. Processes within the same node should preferably be + // associated to the same instance. We have 3 cases: + // + // Case 1: If we have one instance per node, there is nothing special to + // consider. + // + // Case 2: If instances span multiple nodes, they should span over the + // minimal number of nodes possible. Instances only cover whole nodes. We + // have no notion of nodes being "close" to each other, alhtough we group + // nodes together with adjacent node indices. Node indices are inherited + // from rank numbers. If ranks with adjacent indices are "close", then nodes + // with adjacent node indices are "close". + // + // Case 3: If we have multiple instances per node, ranks in the same + // instance should have adjacent world rank numbers, i.e. they should be + // "close" to each other cache-wise. No instance covers more than one node. + int color; + int key = app_instance.mpi.world_rank; // order of the ranks is kept + if (instance_size == topo.ranks_per_node) { // case 1: one instance per node + color = topo.node_index; + } else if (instance_size % topo.ranks_per_node == 0) { // case 2: one instance spans multiple nodes + int nodes_per_instance = instance_size / topo.ranks_per_node; + int remainder = topo.node_index % nodes_per_instance; + color = (topo.node_index - remainder) / nodes_per_instance; // group nodes with adjacent node indices + } else if (topo.ranks_per_node % instance_size == 0) { // case 3: multiple instances per node + int instances_per_node = topo.ranks_per_node / instance_size; + int remainder = topo.node_rank % instance_size; + int per_node_instance_index = (topo.node_rank - remainder) / instance_size; + color = topo.node_index*instances_per_node + per_node_instance_index; + } + + MPI_Comm_split(app_instance.mpi.world_comm, color, key, &app_instance.mpi.comm); + + int n; + MPI_Comm_size(app_instance.mpi.comm, &n); + err(instance_size != n, "Rank topology is not uniform"); + + int instance_rank; + MPI_Comm_rank(app_instance.mpi.comm, &instance_rank); + app_instance.ptbc.instance_id = color; + + printf("\033[0;31m[PTBC] world rank = %d/%d in instance_id = %d/%d, as instance_rank = %d/%d\033[0m\n", + app_instance.mpi.world_rank, topo.number_of_ranks, + app_instance.ptbc.instance_id, app_instance.ptbc.n_instances, + instance_rank, instance_size); + + + //err(true, "bailing out"); + /*if (app_instance.ptbc.instance_id != 0) { + char logfile[1024]; + snprintf(logfile, 1024, "logfile_%.2d.log", app_instance.ptbc.instance_id); + freopen(logfile, "w", stdout); + }*/ + + // Every instance just changes into a subdirectory "instance_xx". Relative + // paths work, absolute paths not. + struct stat st = {0}; + char subdir[1024]; + snprintf(subdir, 1024, "instance_%.2d", app_instance.ptbc.instance_id); + if (stat(subdir, &st) == -1) + mkdir(subdir, 0700); + + chdir(subdir); } diff --git a/src/lib/include/app.h b/src/lib/include/app.h new file mode 100755 index 000000000..dfc3d9f28 --- /dev/null +++ b/src/lib/include/app.h @@ -0,0 +1,108 @@ +/*********************************************************************** + * + * Copyright (C) 2026 Roman Gruber + * + * This file is part of tmLQCD. + * + * tmLQCD is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * tmLQCD is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with tmLQCD. If not, see . + * + * Simple MPI header wrapper + * + * Author: Roman Gruber + * roman.gruber@unibe.ch + * + *******************************************************************************/ + +#ifndef APP_H +#define APP_H + + +#include + + +#if defined(TM_USE_MPI) +#include +#endif + + +#ifndef MPI_VERSION +typedef int MPI_Comm; +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +#define MAX_N_DEFECTS 10 +#define MAX_N_INSTANCES 10 + + +typedef enum direction_t { + DIRECTION_T = 0, + DIRECTION_X = 1, + DIRECTION_Y = 2, + DIRECTION_Z = 3 +} direction_t; + + +typedef struct { + MPI_Comm comm; // MPI instance communicator + MPI_Comm world_comm; // MPI world communicator + int world_rank; // MPI world rank +} MPIContext; + + +typedef struct { + bool active; // Whether the defect is active or not + int Ld[3]; // Extents of the defect + direction_t along; // Along which dimension +} PTBCDefect; + + +typedef struct { + bool active; // Whether the instance is active or not + int n_coeffs; // Number of coefficients / defect this instance is associated to + PTBCDefect** defects; // List of defects where this instance is associated to + double* coefficients; // List of coefficients for the defects +} PTBCInstance; + + +typedef struct { + bool active; // Whether PTBC mode is active or not + int instance_id; // Instance ID + int n_instances; // Number of instances + int n_defects; // Number of defects + PTBCInstance instances[MAX_N_INSTANCES]; // List of all instances + PTBCDefect defects[MAX_N_DEFECTS]; // List of all defects + void (*initialize)(void); // PTBC algorithm initializer +} PTBCContext; + + +typedef struct { + MPIContext mpi; // MPI context + PTBCContext ptbc; // PTBC context +} AppContext; + + +const AppContext* app(void); +AppContext* appm(void); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/lib/include/mpi.h b/src/lib/include/mpi.h index 57c645566..7d816632e 100644 --- a/src/lib/include/mpi.h +++ b/src/lib/include/mpi.h @@ -24,62 +24,12 @@ * *******************************************************************************/ -#ifndef MY_MPI_WRAPPER_H -#define MY_MPI_WRAPPER_H +#ifndef MPI_WRAPPER_H +#define MPI_WRAPPER_H -// include *real* MPI header -#include_next +#include_next // include *real* MPI header +#include "app.h" -#ifdef __cplusplus -extern "C" { -#endif - - -/** - * @brief MPI context - * - * @var comm MPI communicator - */ -typedef struct { - MPI_Comm comm; -} MPIContext; - - -/** - * @brief The global application context struct - * - * @var mpi MPI context - */ -typedef struct { - MPIContext mpi; -} AppContext; - - -/** - * @brief Return the global application context struct - * - * @return Global application context struct - */ -const AppContext* app(void); - - -/** - * @brief Initialize application context - * - * @param[in] comm The MPI communicator to use throughout the application - */ -void app_context_init(const MPI_Comm comm); - - -/** - * @brief Finalize application context - */ -void app_context_finalize(void); - - -#ifdef __cplusplus -} -#endif #endif From d97bafd99a38d9771bc06c60bf0f40b346b57a0f Mon Sep 17 00:00:00 2001 From: Roman Gruber Date: Mon, 2 Mar 2026 11:36:14 +0100 Subject: [PATCH 18/19] added PTBC read in logic --- src/lib/init/init_parallel.c | 2 + src/lib/read_input.l | 227 ++++++++++++++++++++++++++++++++++- 2 files changed, 227 insertions(+), 2 deletions(-) diff --git a/src/lib/init/init_parallel.c b/src/lib/init/init_parallel.c index 9dfdbb0c5..7d830e8bb 100644 --- a/src/lib/init/init_parallel.c +++ b/src/lib/init/init_parallel.c @@ -85,6 +85,8 @@ void init_parallel_and_read_input(int argc, char *argv[], const char input_filen exit(-1); } + app()->ptbc.initialize(); + #ifdef TM_USE_OMP init_openmp(); #endif diff --git a/src/lib/read_input.l b/src/lib/read_input.l index 5eb542f87..3a917bd46 100644 --- a/src/lib/read_input.l +++ b/src/lib/read_input.l @@ -52,6 +52,7 @@ EQL {SPC}*={SPC}* #include #include #include +#include #define INIT_GLOBALS #include "global.h" #undef INIT_GLOBALS @@ -61,12 +62,14 @@ EQL {SPC}*={SPC}* #include "solver/solver_types.h" #include "meas/measurements.h" #include "integrator.h" +#include "include/app.h" #include "operator.h" #include "phmc.h" #include #include "qphix_types.h" #include "quda_types.h" #include "misc_types.h" +#include "alloc.h" #include @@ -87,6 +90,28 @@ static inline void rmQuotes(char *str){ *strsave='\0'; } + +static inline int strlist_count_tokens(char const * const input) +{ + int n = 0; + for (int i = 0; i < strlen(input); i++) + if (input[i] == ',') n++; + return n+1; +} + +static inline void fail(const bool test, const char* fmt, ...) +{ + if (test) { + va_list args; + char msg[1024]; + va_start(args, fmt); + vsnprintf(msg, 1024, fmt, args); + va_end(args); + yy_fatal_error(msg); + } +} + + /* tokenize the comma-delimited list 'input' of the form 'list = token1, token2, ...' and return the first token, which is the name of the list @@ -472,7 +497,61 @@ static inline double fltlist_next_token(int * const list_end){ } free(input_copy); } - + + static inline void parse_int_par_array(char const * const input, int * par_array, const int max_size) { + char paramname[100]; + char error_message[ERR_MSG_LEN]; + int list_end = 0; + int element = 0; + + char * input_copy = (char*)NULL; + strlist_tokenize(input, &input_copy, paramname, 100); + int parval = (int) fltlist_next_token(&list_end); + while( list_end != 1 ){ + if( element >= max_size ){ + snprintf(error_message, ERR_MSG_LEN, "Exceeded maximum number of elements (%d) parsing %s!\n", max_size, paramname); + yy_fatal_error(error_message); + } + + par_array[element] = parval; + if(myverbose){ + printf(" %s, element %d set to %d line %d\n", paramname, + element, par_array[element], line_of_file); + } + + element++; + parval = (int) fltlist_next_token(&list_end); + } + free(input_copy); + } + + static inline void parse_dbl_par_array(char const * const input, double * par_array, const int max_size) { + char paramname[100]; + char error_message[ERR_MSG_LEN]; + int list_end = 0; + int element = 0; + + char * input_copy = (char*)NULL; + strlist_tokenize(input, &input_copy, paramname, 100); + double parval = fltlist_next_token(&list_end); + while( list_end != 1 ){ + if( element >= max_size ){ + snprintf(error_message, ERR_MSG_LEN, "Exceeded maximum number of elements (%d) parsing %s!\n", max_size, paramname); + yy_fatal_error(error_message); + } + + par_array[element] = parval; + if(myverbose){ + printf(" %s, element %d set to %e line %d\n", paramname, + element, par_array[element], line_of_file); + } + + element++; + parval = fltlist_next_token(&list_end); + } + free(input_copy); + } + %} %option never-interactive @@ -556,6 +635,11 @@ static inline double fltlist_next_token(int * const list_end){ %x INITINTEGRATOR %x INTEGRATOR +%x INITPTBC +%x PTBC +%x PTBCDEFECT +%x PTBCINSTANCE + %x DEFLATION %x INITDEFLATION @@ -712,6 +796,8 @@ static inline double fltlist_next_token(int * const list_end){ ^BeginInt BEGIN(INITINTEGRATOR); ^BeginOperator{SPC}+ BEGIN(INITOPERATOR); +^BeginPTBC BEGIN(INITPTBC); + ^BeginExternalInverter{SPC}+ BEGIN(INITEXTERNALINVERTER); ^BeginTuneMGParams{SPC}+ BEGIN(TUNEMGPARAMS); @@ -3071,6 +3157,143 @@ static inline double fltlist_next_token(int * const list_end){ } } +{SPC}* { + appm()->ptbc.active = false; + appm()->ptbc.n_instances = 0; + appm()->ptbc.n_defects = 0; + appm()->ptbc.instance_id = 0; + if(myverbose) printf("Initialising PTBC line %d\n", line_of_file); + BEGIN(PTBC); +} +{ + {SPC}*BeginPTBCDefect{SPC}+{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] %d", name, &b); + fail(b<0 || b>MAX_N_DEFECTS, "PTBC Defect id = %d out of bounds!", b); + appm()->ptbc.defects[b].active = true; + appm()->ptbc.n_defects++; + fail(appm()->ptbc.n_defects>=MAX_N_DEFECTS, "To many PTBC defects! Limit is %d", MAX_N_DEFECTS); + if(myverbose) printf(" Initialising PTBC defect (index = %d) in line %d\n", b, line_of_file); + BEGIN(PTBCDEFECT); + } + { + {SPC}*Extents{EQL}{STRLIST} { + + PTBCDefect* cd = appm()->ptbc.defects + b; + + int n_parameters = strlist_count_tokens(yytext); + fail(n_parameters != 3, "Coefficients must provide exactly 3 numbers, not %d!\n", n_parameters); + + sscanf(yytext, " %[a-zA-Z] = %d, %d, %d", name, cd->Ld, cd->Ld+1, cd->Ld+2); + if(myverbose) printf(" Ld = [%d, %d, %d] line %d\n", cd->Ld[0], cd->Ld[1], cd->Ld[2], line_of_file); + BEGIN(PTBCDEFECT); + } + {SPC}*Along{EQL}T { + appm()->ptbc.defects[b].along = DIRECTION_T; + if(myverbose) printf(" PTBC defect along T direction line %d\n", line_of_file); + BEGIN(PTBCDEFECT); + } + {SPC}*Along{EQL}X { + appm()->ptbc.defects[b].along = DIRECTION_X; + if(myverbose) printf(" PTBC defect along X direction line %d\n", line_of_file); + BEGIN(PTBCDEFECT); + } + {SPC}*Along{EQL}Y { + appm()->ptbc.defects[b].along = DIRECTION_Y; + if(myverbose) printf(" PTBC defect along Y direction line %d\n", line_of_file); + BEGIN(PTBCDEFECT); + } + {SPC}*Along{EQL}Z { + appm()->ptbc.defects[b].along = DIRECTION_Z; + if(myverbose) printf(" PTBC defect along Z direction line %d\n", line_of_file); + BEGIN(PTBCDEFECT); + } + {SPC}*EndPTBCDefect{SPC}* { + if(myverbose) printf(" PTBC defect parsed line %d\n", line_of_file); + BEGIN(PTBC); + } + } + {SPC}*BeginPTBCInstance{SPC}+{DIGIT}+ { + sscanf(yytext, " %[a-zA-Z] %d", name, &b); + fail(b<0 || b>MAX_N_INSTANCES, "PTBC Instance id = %d out of bounds!", b); + appm()->ptbc.instances[b].active = true; + appm()->ptbc.instances[b].n_coeffs = -1; + appm()->ptbc.n_instances++; + fail(appm()->ptbc.n_instances>=MAX_N_INSTANCES, "To many PTBC instances! Limit is %d", MAX_N_INSTANCES); + if(myverbose) printf(" Initialising PTBC instance (index = %d) in line %d\n", b, line_of_file); + BEGIN(PTBCINSTANCE); + } + { + {SPC}*Coefficients{EQL}{STRLIST} { + + PTBCInstance* ci = appm()->ptbc.instances + b; + + int n_parameters = strlist_count_tokens(yytext); + if (ci->coefficients == NULL) + ci->coefficients = safe_malloc(n_parameters*sizeof(double*)); + + parse_dbl_par_array(yytext, ci->coefficients, n_parameters); + + for (int i = 0; i < n_parameters; ++i) { + fail(ci->coefficients[i]<0 || ci->coefficients[i] > 1, + "Coefficient %d with value %e is out of bounds", i, ci->coefficients[i]); + } + + if (ci->n_coeffs == -1) ci->n_coeffs = n_parameters; + fail(ci->n_coeffs != n_parameters, "Number of PTBC coefficients must be equal to number of defects!\n"); + + for (int i = 0; i < n_parameters; ++i) { + if(myverbose) printf(" Coefficients[%d] = %f line %d\n", i, ci->coefficients[i], line_of_file); + } + + BEGIN(PTBCINSTANCE); + } + {SPC}*Defects{EQL}{STRLIST} { + + PTBCInstance* ci = appm()->ptbc.instances + b; + + int n_parameters = strlist_count_tokens(yytext); + if (ci->defects == NULL) { + ci->defects = safe_malloc(n_parameters*sizeof(PTBCDefect*)); + } + + int* ids = safe_malloc(n_parameters*sizeof(int*)); + parse_int_par_array(yytext, ids, n_parameters); + + for (int i = 0; i < n_parameters; ++i) { + if(myverbose) printf(" Defects = %d line %d\n", ids[i], line_of_file); + fail(ids[i]<0 || ids[i]>MAX_N_DEFECTS, "PTBC Defect id = %d out of bounds!", ids[i]); + ci->defects[i] = &(appm()->ptbc.defects[ids[i]]); + } + + if (ci->n_coeffs == -1) ci->n_coeffs = n_parameters; + + fail(ci->n_coeffs != n_parameters, + "Number of PTBC defects must be equal to number of coefficients!\n"); + + free(ids); + BEGIN(PTBCINSTANCE); + } + {SPC}*EndPTBCInstance{SPC}* { + if(myverbose) printf(" PTBC instance parsed line %d\n", line_of_file); + BEGIN(PTBC); + } + } + EndPTBC{SPC}* { + fail(app()->ptbc.n_instances == 1, "Number of PTBC chains must be larger than 1!\n"); + + for (int i = 0; i < app()->ptbc.n_instances; ++i) { + fail(app()->ptbc.instances[i].active == false, "PTBC instance %d is not active!\n", i); + + for (int j = 0; j < app()->ptbc.instances[i].n_coeffs; ++j) { + fail(app()->ptbc.instances[i].defects[j]->active == false, "PTBC instance %d does not refer to a valid defect!\n", i); + } + } + + if(myverbose) printf("PTBC parsed line %d\n\n", line_of_file); + BEGIN(0); + } +} + { Point { SourceInfo.type = SRC_TYPE_POINT; @@ -3729,7 +3952,7 @@ static inline double fltlist_next_token(int * const list_end){ BEGIN(comment_caller); } -{SPC}*\n { +{SPC}*\n { line_of_file++; } <*>{SPC}*\n { From 90340d4d1f720f53eba8c01708755e7999fa7b29 Mon Sep 17 00:00:00 2001 From: Roman Gruber Date: Mon, 2 Mar 2026 11:36:29 +0100 Subject: [PATCH 19/19] added PTBC input file example --- ...-ptbc-hmc-rgmixedcg-tmcloverdetratio.input | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100755 doc/sample-input/sample-ptbc-hmc-rgmixedcg-tmcloverdetratio.input diff --git a/doc/sample-input/sample-ptbc-hmc-rgmixedcg-tmcloverdetratio.input b/doc/sample-input/sample-ptbc-hmc-rgmixedcg-tmcloverdetratio.input new file mode 100755 index 000000000..96e5e94ca --- /dev/null +++ b/doc/sample-input/sample-ptbc-hmc-rgmixedcg-tmcloverdetratio.input @@ -0,0 +1,117 @@ +# this is a sample input file for cloverdet + cloverdetratio using +# DDalphaAMG as a solver + +L=8 +T=8 +Measurements = 20 +Startcondition = hot +2KappaMu = 0.01 +CSW = 1.00 +kappa = 0.138 +NSave = 500000 +ThetaT = 1.0 +UseEvenOdd = yes +ReversibilityCheck = yes +ReversibilityCheckIntervall = 4 +InitialStoreCounter = 0 +DebugLevel = 1 + +BeginPTBC + + BeginPTBCDefect 0 + Extents = 2, 2, 2 + Along = Y + EndPTBCDefect + + BeginPTBCInstance 0 + Coefficients = 1.0 + Defects = 0 + EndPTBCInstance + + BeginPTBCInstance 1 + Coefficients = 0.0 + Defects = 0 + EndPTBCInstance + + BeginPTBCInstance 2 + Coefficients = 0.75 + Defects = 0 + EndPTBCInstance + + BeginPTBCInstance 3 + Coefficients = 0.5 + Defects = 0 + EndPTBCInstance + +EndPTBC + + + +# since this is a test file, we employ the reproducible random numbers mode +ReproduceRandomNumbers = yes +Seed = 127782 + +BeginMeasurement CORRELATORS + Frequency = 2 +EndMeasurement + +BeginMonomial GAUGE + Type = Wilson + beta = 5.60 + Timescale = 0 +EndMonomial + +BeginMonomial CLOVERDET + Timescale = 1 + 2KappaMu = 0.01 + CSW = 1.00 + # nominator shift + rho = 0.1 + kappa = 0.138 + AcceptancePrecision = 1.e-20 + ForcePrecision = 1.e-14 + Name = cloverdet + solver = rgmixedcg + usesloppyprecision = single +EndMonomial + +BeginMonomial CLOVERDETRATIO + Timescale = 2 + 2KappaMu = 0.01 + # nominator shift + rho = 0.0 + # denominator shift, should match CLOVERDET shift + rho2 = 0.1 + CSW = 1.00 + kappa = 0.138 + AcceptancePrecision = 1.e-20 + ForcePrecision = 1.e-16 + Name = cloverdetratio + solver = rgmixedcg + usesloppyprecision = single +EndMonomial + +BeginIntegrator + Type0 = 2MNFG + Type1 = 2MNFG + Type2 = 2MNFG + IntegrationSteps0 = 1 + IntegrationSteps1 = 1 + IntegrationSteps2 = 4 + tau = 1.00 + Lambda0 = 0.16666667 + Lambda1 = 0.16666667 + Lambda2 = 0.16666667 + NumberOfTimescales = 3 +EndIntegrator + +BeginOperator CLOVER + 2KappaMu = 0.01 + CSW = 1.00 + kappa = 0.138 + SolverPrecision = 1e-16 + MaxSolverIterations = 1000 + useevenodd = yes + solver = rgmixedcg + usesloppyprecision = single +EndOperator