diff --git a/.devcontainer/build_cxx.sh b/.devcontainer/build_cxx.sh index 432d7d32db..109d2d7d21 100755 --- a/.devcontainer/build_cxx.sh +++ b/.devcontainer/build_cxx.sh @@ -5,7 +5,7 @@ NPROC=$(nproc --all) SCRIPT_PATH=$(dirname $(realpath -s $0)) export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch -TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') +TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') mkdir -p ${SCRIPT_PATH}/../buildcxx/ cd ${SCRIPT_PATH}/../buildcxx/ diff --git a/.devcontainer/gdb_lmp b/.devcontainer/gdb_lmp index 33e883780b..fc1c8b90fe 100755 --- a/.devcontainer/gdb_lmp +++ b/.devcontainer/gdb_lmp @@ -2,7 +2,7 @@ SCRIPT_PATH=$(dirname $(realpath -s $0)) export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch -TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') +TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ diff --git a/.devcontainer/gdb_pytest_lmp b/.devcontainer/gdb_pytest_lmp index e27e40d4b0..d27587ec43 100755 --- a/.devcontainer/gdb_pytest_lmp +++ b/.devcontainer/gdb_pytest_lmp @@ -2,7 +2,7 @@ SCRIPT_PATH=$(dirname $(realpath -s $0))/../.. export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch -TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') +TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ diff --git a/.devcontainer/lmp b/.devcontainer/lmp index c8e781aa57..524f99b326 100755 --- a/.devcontainer/lmp +++ b/.devcontainer/lmp @@ -2,7 +2,7 @@ SCRIPT_PATH=$(dirname $(realpath -s $0)) export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch -TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') +TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ diff --git a/.devcontainer/pytest_lmp b/.devcontainer/pytest_lmp index 9371ba72d5..bb88da883f 100755 --- a/.devcontainer/pytest_lmp +++ b/.devcontainer/pytest_lmp @@ -2,7 +2,7 @@ SCRIPT_PATH=$(dirname $(realpath -s $0))/../.. export CMAKE_PREFIX_PATH=${SCRIPT_PATH}/../libtorch -TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') +TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') env LAMMPS_PLUGIN_PATH=${SCRIPT_PATH}/../dp/lib/deepmd_lmp \ LD_LIBRARY_PATH=${SCRIPT_PATH}/../dp/lib:${CMAKE_PREFIX_PATH}/lib:${TENSORFLOW_ROOT} \ diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml index 7d76e60ba1..e2c446a64d 100644 --- a/.github/workflows/test_cc.yml +++ b/.github/workflows/test_cc.yml @@ -26,7 +26,7 @@ jobs: - name: Install Python dependencies run: | source/install/uv_with_retry.sh pip install --system tensorflow-cpu~=2.18.0 jax==0.5.0 - export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') source/install/uv_with_retry.sh pip install --system -e .[cpu,test,lmp,jax] mpi4py mpich source/install/uv_with_retry.sh pip install --system 'torch==2.7' --index-url https://download.pytorch.org/whl/cpu - name: Convert models diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index 14c051123c..b527a5481f 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -46,7 +46,7 @@ jobs: - run: source/install/uv_with_retry.sh pip install --system "tensorflow~=2.18.0rc2" "torch~=2.7.0" "jax[cuda12]==0.5.0" - run: | export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])') - export TENSORFLOW_ROOT=$(python -c 'import importlib,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') + export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') pip install "paddlepaddle-gpu==3.0.0" -i https://www.paddlepaddle.org.cn/packages/stable/cu126/ source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch,jax] mpi4py --reinstall-package deepmd-kit env: diff --git a/.github/workflows/test_python.yml b/.github/workflows/test_python.yml index 8274337e65..ec21fbc669 100644 --- a/.github/workflows/test_python.yml +++ b/.github/workflows/test_python.yml @@ -27,7 +27,7 @@ jobs: - run: | source/install/uv_with_retry.sh pip install --system openmpi tensorflow-cpu~=2.18.0 source/install/uv_with_retry.sh pip install --system torch -i https://download.pytorch.org/whl/cpu - export TENSORFLOW_ROOT=$(python -c 'import tensorflow;print(tensorflow.__path__[0])') + export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') export PYTORCH_ROOT=$(python -c 'import torch;print(torch.__path__[0])') source/install/uv_with_retry.sh pip install --system -e .[test,jax] mpi4py "jax==0.5.0;python_version>='3.10'" source/install/uv_with_retry.sh pip install --system -U setuptools diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 8b055f9f4f..a0a1e65aca 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import os +import re import site from functools import ( lru_cache, @@ -56,6 +57,10 @@ def find_tensorflow() -> tuple[Optional[str], list[str]]: ) is not None: site_packages = Path(os.environ.get("TENSORFLOW_ROOT")).parent.absolute() tf_spec = FileFinder(str(site_packages)).find_spec("tensorflow") + if tf_spec is None: + raise RuntimeError( + f"cannot find TensorFlow under TENSORFLOW_ROOT {os.environ.get('TENSORFLOW_ROOT')}" + ) # get tensorflow spec # note: isolated build will not work for backend @@ -153,7 +158,8 @@ def get_tf_requirement(tf_version: str = "") -> dict: "tensorflow-cpu; platform_machine!='aarch64' and (platform_machine!='arm64' or platform_system != 'Darwin')", "tensorflow; platform_machine=='aarch64' or (platform_machine=='arm64' and platform_system == 'Darwin')", # https://github.com/tensorflow/tensorflow/issues/61830 - "tensorflow-cpu!=2.15.*; platform_system=='Windows'", + # Since TF 2.20, not all symbols are exported to the public API. + "tensorflow-cpu!=2.15.*,<2.20; platform_system=='Windows'", # https://github.com/h5py/h5py/issues/2408 "h5py>=3.6.0,!=3.11.0; platform_system=='Linux' and platform_machine=='aarch64'", *extra_requires, @@ -228,6 +234,22 @@ def get_tf_version(tf_path: Optional[Union[str, Path]]) -> str: patch = line.split()[-1] elif line.startswith("#define TF_VERSION_SUFFIX"): suffix = line.split()[-1].strip('"') + if None in (major, minor, patch): + # since TF 2.20.0, version information is no more contained in version.h + # try to read version from tools/pip_package/setup.py + # _VERSION = '2.20.0' + setup_file = Path(tf_path) / "tools" / "pip_package" / "setup.py" + if setup_file.exists(): + with open(setup_file) as f: + for line in f: + # parse with regex + match = re.search( + r"_VERSION[ \t]*=[ \t]*'(\d+)\.(\d+)\.(\d+)([a-zA-Z0-9]*)?'", + line, + ) + if match: + major, minor, patch, suffix = match.groups() + break if None in (major, minor, patch): raise RuntimeError("Failed to read TF version") return ".".join((major, minor, patch)) + suffix diff --git a/source/cmake/Findtensorflow.cmake b/source/cmake/Findtensorflow.cmake index d579af7679..b5b8c92f3d 100644 --- a/source/cmake/Findtensorflow.cmake +++ b/source/cmake/Findtensorflow.cmake @@ -291,8 +291,10 @@ if(NOT DEFINED TENSORFLOW_VERSION) TENSORFLOW_VERSION_RUN_RESULT_VAR TENSORFLOW_VERSION_COMPILE_RESULT_VAR ${CMAKE_CURRENT_BINARY_DIR}/tf_version "${CMAKE_CURRENT_LIST_DIR}/tf_version.cpp" - CMAKE_FLAGS "-DINCLUDE_DIRECTORIES:STRING=${TensorFlow_INCLUDE_DIRS}" - RUN_OUTPUT_VARIABLE TENSORFLOW_VERSION + CMAKE_FLAGS + "-DINCLUDE_DIRECTORIES:STRING=${TensorFlow_INCLUDE_DIRS}" LINK_LIBRARIES + ${TensorFlowFramework_LIBRARY} ${TensorFlow_LIBRARY} + RUN_OUTPUT_STDOUT_VARIABLE TENSORFLOW_VERSION COMPILE_OUTPUT_VARIABLE TENSORFLOW_VERSION_COMPILE_OUTPUT_VAR) if(NOT ${TENSORFLOW_VERSION_COMPILE_RESULT_VAR}) message( @@ -304,6 +306,23 @@ if(NOT DEFINED TENSORFLOW_VERSION) endif() endif() +if(TENSORFLOW_VERSION VERSION_GREATER_EQUAL 2.20) + # since TF 2.20, macros like TF_MAJOR_VERSION, TF_MINOR_VERSION, and + # TF_PATCH_VERSION are not defined We manuanlly define them in our CMake files + # first, split TENSORFLOW_VERSION (e.g. 2.20.0rc0) to 2 20 0 rc0 + string(REGEX MATCH "^([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)$" _match + ${TENSORFLOW_VERSION}) + if(_match) + set(TF_MAJOR_VERSION ${CMAKE_MATCH_1}) + set(TF_MINOR_VERSION ${CMAKE_MATCH_2}) + set(TF_PATCH_VERSION ${CMAKE_MATCH_3}) + # add defines + add_definitions(-DTF_MAJOR_VERSION=${TF_MAJOR_VERSION}) + add_definitions(-DTF_MINOR_VERSION=${TF_MINOR_VERSION}) + add_definitions(-DTF_PATCH_VERSION=${TF_PATCH_VERSION}) + endif() +endif() + # print message if(NOT TensorFlow_FIND_QUIETLY) message( diff --git a/source/cmake/tf_version.cpp b/source/cmake/tf_version.cpp index 6d09e33493..2ad2125291 100644 --- a/source/cmake/tf_version.cpp +++ b/source/cmake/tf_version.cpp @@ -1,12 +1,14 @@ // SPDX-License-Identifier: LGPL-3.0-or-later #include -#include "tensorflow/core/public/version.h" +#include "tensorflow/c/c_api.h" int main(int argc, char* argv[]) { // See // https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h // TF_VERSION_STRING has been available since TensorFlow v0.6 - std::cout << TF_VERSION_STRING; + // Aug 2025: since TF 2.20, TF_VERSION_STRING is no more available; + // try to use the C API TF_Version + std::cout << TF_Version(); return 0; }