diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d4c8be643e1..b8c4c5fd0a7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -184,70 +184,8 @@ jobs: tests: needs: [calc_ver, build] - runs-on: ubuntu-latest - env: - JOSHUA_DB_VER: "6.3.15" - N_OF_TESTS: 500 # to fit in 360 minutes job run limit - JOSHUA_AGENT_URL: "docker.io/1inker" - JOSHUA_AGENT_TAG: "rhel8-20220816" - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - path: ${{github.workspace}}/src - - - name: Install dependencies - shell: bash - run: | - sudo apt-get update - sudo apt-get install -y sudo wget crudini git python3 python3-pip - sudo pip3 install wheel setuptools python-dateutil lxml boto3 - - - name: Install FoundationDb - shell: bash - run: | - mkdir deb - pushd deb - MY_ARCH=`dpkg-architecture -q DEB_BUILD_ARCH` - wget https://github.com/apple/foundationdb/releases/download/${{ env.JOSHUA_DB_VER }}/foundationdb-clients_${{ env.JOSHUA_DB_VER }}-1_${MY_ARCH}.deb https://github.com/apple/foundationdb/releases/download/${{ env.JOSHUA_DB_VER }}/foundationdb-server_${{ env.JOSHUA_DB_VER }}-1_${MY_ARCH}.deb - sudo apt-get install -y ./foundationdb-clients_${{ env.JOSHUA_DB_VER }}-1_${MY_ARCH}.deb ./foundationdb-server_${{ env.JOSHUA_DB_VER }}-1_${MY_ARCH}.deb - popd - sudo systemctl stop foundationdb - MY_IP=`hostname -I | awk '{print $1}'` - sudo sed -i s/127.0.0.1/$MY_IP/ /etc/foundationdb/fdb.cluster - sudo crudini --set /etc/foundationdb/foundationdb.conf fdbserver memory 4GiB - sudo systemctl start foundationdb - pip3 install 'foundationdb==${{ env.JOSHUA_DB_VER }}' - - - name: Download the correctness package - uses: actions/download-artifact@v4 - id: download_correctness - with: - name: correctness-${{needs.calc_ver.outputs.full_ver}}.tar.gz - - - name: 'Echo download path' - run: echo ${{steps.download_correctness.outputs.download-path}} - - - name: Display structure of downloaded files - run: ls -R - working-directory: ${{github.workspace}} - - - name: Download joshua - shell: bash - run: | - git clone https://github.com/FoundationDB/fdb-joshua.git - - - name: run joshua-agent - shell: bash - run: | - podman pull ${{ env.JOSHUA_AGENT_URL }}/joshua-agent:${{ env.JOSHUA_AGENT_TAG }} - podman create --name joshua-agent -v /etc/foundationdb:/etc/foundationdb -it joshua-agent:${{ env.JOSHUA_AGENT_TAG }} - podman start joshua-agent - - - name: run tests - shell: bash - working-directory: ${{github.workspace}}/fdb-joshua - run: | - podman ps - ${{github.workspace}}/src/build-scripts/for-linux/test-joshua.bash ${{github.workspace}}/correctness-${{needs.calc_ver.outputs.full_ver}}.tar.gz ${{env.N_OF_TESTS}} \ No newline at end of file + uses: ./.github/workflows/run-tests.yml + with: + full_ver: ${{needs.calc_ver.outputs.full_ver}} + build_run_id: ${{ github.run_id }} + secrets: inherit diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml new file mode 100644 index 00000000000..e0cdac8e81d --- /dev/null +++ b/.github/workflows/run-tests.yml @@ -0,0 +1,95 @@ +name: Run Tests (reusable) + +on: + workflow_call: + inputs: + full_ver: + description: 'Version of the build to test (e.g. 7.4.0-3.1.ow)' + required: true + type: string + build_run_id: + description: 'Run ID to download the correctness package from' + required: true + type: string + +jobs: + tests: + runs-on: ubuntu-latest + env: + JOSHUA_DB_VER: "7.1.57" + N_OF_TESTS: 500 # to fit in 360 minutes job run limit + JOSHUA_AGENT_TAG: "rockylinux9.6-20260309" + # parameter that controls the maximum lifetime of the Joshua agent (in seconds). + AGENT_TIMEOUT: 18000 + + steps: + - name: Set agent URL + run: | + echo "JOSHUA_AGENT_URL=ghcr.io/${GITHUB_REPOSITORY_OWNER,,}" >> $GITHUB_ENV + echo "Agent URL: ghcr.io/${GITHUB_REPOSITORY_OWNER,,}" + + - name: Checkout + uses: actions/checkout@v4 + with: + path: ${{github.workspace}}/src + + - name: Install dependencies + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y sudo wget crudini git python3 python3-pip + sudo pip3 install wheel setuptools python-dateutil lxml boto3 + + - name: Install FoundationDb + shell: bash + run: | + mkdir deb + pushd deb + MY_ARCH=`dpkg-architecture -q DEB_BUILD_ARCH` + wget https://github.com/apple/foundationdb/releases/download/${{ env.JOSHUA_DB_VER }}/foundationdb-clients_${{ env.JOSHUA_DB_VER }}-1_${MY_ARCH}.deb https://github.com/apple/foundationdb/releases/download/${{ env.JOSHUA_DB_VER }}/foundationdb-server_${{ env.JOSHUA_DB_VER }}-1_${MY_ARCH}.deb + sudo apt-get install -y ./foundationdb-clients_${{ env.JOSHUA_DB_VER }}-1_${MY_ARCH}.deb ./foundationdb-server_${{ env.JOSHUA_DB_VER }}-1_${MY_ARCH}.deb + popd + sudo systemctl stop foundationdb + MY_IP=`hostname -I | awk '{print $1}'` + sudo sed -i s/127.0.0.1/$MY_IP/ /etc/foundationdb/fdb.cluster + sudo crudini --set /etc/foundationdb/foundationdb.conf fdbserver memory 4GiB + sudo systemctl start foundationdb + pip3 install 'foundationdb==${{ env.JOSHUA_DB_VER }}' + + - name: Download the correctness package + uses: actions/download-artifact@v4 + id: download_correctness + with: + name: correctness-${{ inputs.full_ver }}.tar.gz + run-id: ${{ inputs.build_run_id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Echo download path + run: echo ${{steps.download_correctness.outputs.download-path}} + + - name: Display structure of downloaded files + run: ls -R + working-directory: ${{github.workspace}} + + - name: Download joshua + shell: bash + run: | + git clone https://github.com/FoundationDB/fdb-joshua.git + + - name: run joshua-agent + shell: bash + run: | + podman pull ${{ env.JOSHUA_AGENT_URL }}/joshua-agent:${{ env.JOSHUA_AGENT_TAG }} + for i in 1 2 3 4; do + podman run -d \ + -v /etc/foundationdb:/etc/foundationdb \ + -e AGENT_TIMEOUT=${{ env.AGENT_TIMEOUT }} \ + joshua-agent:${{ env.JOSHUA_AGENT_TAG }} + done + + - name: run tests + shell: bash + working-directory: ${{github.workspace}}/fdb-joshua + run: | + podman ps + ${{github.workspace}}/src/build-scripts/for-linux/test-joshua.bash ${{github.workspace}}/correctness-${{ inputs.full_ver }}.tar.gz ${{env.N_OF_TESTS}} \ No newline at end of file diff --git a/.github/workflows/tests-only.yml b/.github/workflows/tests-only.yml new file mode 100644 index 00000000000..7feb20dec8a --- /dev/null +++ b/.github/workflows/tests-only.yml @@ -0,0 +1,19 @@ +name: Tests only + +on: + workflow_dispatch: + inputs: + full_ver: + description: 'Version of the build to test (e.g. 7.4.0-3.1.ow)' + required: true + build_run_id: + description: 'Run ID of the build workflow (find it in the URL of the build run)' + required: true + +jobs: + tests: + uses: ./.github/workflows/run-tests.yml + with: + full_ver: ${{ github.event.inputs.full_ver }} + build_run_id: ${{ github.event.inputs.build_run_id }} + secrets: inherit \ No newline at end of file diff --git a/.github/workflows/windows-boost-test.yml b/.github/workflows/windows-boost-test.yml new file mode 100644 index 00000000000..08a74fcf099 --- /dev/null +++ b/.github/workflows/windows-boost-test.yml @@ -0,0 +1,41 @@ +name: Windows Boost CONFIG Test + +on: + pull_request: + push: + branches: + - 'test-*' + - 'boost-*' + workflow_dispatch: + +jobs: + test-windows-boost: + name: Test Boost CONFIG Mode on Windows + runs-on: windows-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Setup CMake + uses: lukka/get-cmake@latest + + - name: Install Boost via vcpkg + run: | + vcpkg install boost-filesystem:x64-windows boost-iostreams:x64-windows boost-serialization:x64-windows boost-system:x64-windows boost-program-options:x64-windows boost-url:x64-windows boost-context:x64-windows + shell: cmd + + - name: Configure and Build FoundationDB (without Swift) + run: | + mkdir build + cd build + cmake .. -DCMAKE_TOOLCHAIN_FILE=%VCPKG_INSTALLATION_ROOT%\scripts\buildsystems\vcpkg.cmake -DCMAKE_BUILD_TYPE=Release -DWITH_SWIFT=OFF -DBUILD_TESTING=OFF + shell: cmd + + - name: Verify Build Success + run: | + echo "Build completed successfully - Boost CONFIG mode works on Windows!" + shell: cmd + + - name: Report Success + run: echo "✓ Boost CONFIG mode working on Windows!" diff --git a/.mailmap b/.mailmap new file mode 100644 index 00000000000..031fe37ec2a --- /dev/null +++ b/.mailmap @@ -0,0 +1,12 @@ +# This file maps author names and email addresses to canonical identities +# Format: Preferred Name [Commit Name ] +# See git-shortlog(1) for more information +# +# Single entries establish the canonical identity for a contributor + +A.J. Beamon +Alex Miller +Evan Tschannen +Jingyu Zhou +Steve Atherton +Trevor Clinkenbeard diff --git a/CMakeLists.txt b/CMakeLists.txt index b9a8d1f1c33..fa69c0862da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -186,24 +186,68 @@ endif() include(utils) -# Flow and other tools are written in C# - so we need that dependency -include(EnableCsharp) +# First thing we need is the actor compiler +option( + WITH_CSHARP + "Prefer C# build tools (actor compiler, coverage tool, vexillographer) when a toolchain is available" + ON) + +set(FDB_USE_CSHARP_TOOLS_EXPLICIT FALSE) +if(DEFINED FDB_USE_CSHARP_TOOLS) + set(FDB_USE_CSHARP_TOOLS_EXPLICIT TRUE) +endif() -# First thing we need is the actor compiler - and to compile and run the actor -# compiler, we need mono -include(CompileActorCompiler) +if(NOT DEFINED FDB_USE_CSHARP_TOOLS) + set(FDB_USE_CSHARP_TOOLS ${WITH_CSHARP}) +else() + set(WITH_CSHARP ${FDB_USE_CSHARP_TOOLS}) +endif() -include(CompileCoverageTool) +set(CSHARP_TOOLCHAIN_FOUND FALSE) +set(COVERAGETOOL_AVAILABLE FALSE) +if(FDB_USE_CSHARP_TOOLS) + if(WIN32) + find_program(dotnet_EXECUTABLE NAMES dotnet dotnet.exe) + if(dotnet_EXECUTABLE) + set(CSHARP_TOOLCHAIN_FOUND TRUE) + endif() + else() + find_package(dotnet) + if(dotnet_FOUND) + set(CSHARP_TOOLCHAIN_FOUND TRUE) + endif() + endif() -# with the actor compiler, we can now make the flow commands available -include(FlowCommands) + if(NOT CSHARP_TOOLCHAIN_FOUND) + find_package(mono) + if(mono_FOUND) + set(CSHARP_TOOLCHAIN_FOUND TRUE) + set(CSHARP_USE_MONO TRUE) + endif() + endif() +endif() -############################################################################### -# Vexilographer -############################################################################### +if(FDB_USE_CSHARP_TOOLS_EXPLICIT AND FDB_USE_CSHARP_TOOLS + AND NOT CSHARP_TOOLCHAIN_FOUND) + message( + FATAL_ERROR + "FDB_USE_CSHARP_TOOLS is enabled, but CSHARP_TOOLCHAIN_FOUND is FALSE. Install .NET (dotnet) or Mono, or set WITH_CSHARP=OFF.") +endif() + +include(CompileActorCompiler) +if(FDB_USE_CSHARP_TOOLS AND CSHARP_TOOLCHAIN_FOUND) + include(CompileCoverageTool) + set(COVERAGETOOL_AVAILABLE TRUE) +else() + message(STATUS "C# tooling disabled or not found; skipping coverage tool build") +endif() +# Vexilographer generation is configured inside fdbclient include(CompileVexillographer) +# with the actor compiler, we can now make the flow commands available +include(FlowCommands) + ############################################################################### # Generate config file ############################################################################### @@ -261,13 +305,12 @@ add_subdirectory(fdbctl) add_subdirectory(fdbclient) add_subdirectory(fdbserver) add_subdirectory(fdbcli) +add_subdirectory(contrib/replay) if(NOT WIN32) if(NOT FOUNDATIONDB_CROSS_COMPILING) # FIXME(swift): make this work when # x-compiling. add_subdirectory(fdbmonitor) endif() -else() - add_subdirectory(fdbservice) endif() add_subdirectory(fdbbackup) add_subdirectory(tests) @@ -283,7 +326,7 @@ if(WITH_PYTHON AND WITH_C_BINDING) endif() if(WITH_DOCUMENTATION) add_subdirectory(documentation) - if(BUILD_JAVA_BINDING) + if(BUILD_JAVA_BINDING AND TARGET CopyJavadoc) add_dependencies(html CopyJavadoc) endif() endif() diff --git a/bindings/c/CMakeLists.txt b/bindings/c/CMakeLists.txt index 1ff61cceb6f..a6405a1ccf9 100644 --- a/bindings/c/CMakeLists.txt +++ b/bindings/c/CMakeLists.txt @@ -60,6 +60,14 @@ add_dependencies(fdb_c fdb_c_generated fdb_c_options) add_dependencies(fdbclient fdb_c_options) add_dependencies(fdbclient_sampling fdb_c_options) target_link_libraries(fdb_c PRIVATE $) +# libfdb_c.so is a C++ shared library exposing a C API. When built with +# -static-libstdc++ (PORTABLE_BINARY), the static libstdc++ may lack symbols +# generated by newer GCC versions (e.g. std::ios_base_library_init in GCC 15). +# Allow consumers to defer resolution of the .so's C++ symbols to runtime. +if(NOT APPLE) + target_link_options(fdb_c INTERFACE + $<$:-Wl,--allow-shlib-undefined>) +endif() if(USE_UBSAN) # The intent of this hack is to force c targets that depend on fdb_c to use # c++ as their linker language. Otherwise you see undefined references to c++ @@ -321,7 +329,7 @@ if(NOT WIN32) endforeach() add_python_venv_test(NAME fdb_c_upgrade_to_future_version - COMMAND python ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py + COMMAND python -m fdb_test_runner.upgrade_test --build-dir ${CMAKE_BINARY_DIR} --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml --upgrade-path "${FDB_CURRENT_VERSION}" "${FDB_FUTURE_VERSION}" "${FDB_CURRENT_VERSION}" @@ -336,7 +344,7 @@ if(NOT WIN32) ) add_python_venv_test(NAME fdb_c_upgrade_from_prev2_gradual - COMMAND python ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py + COMMAND python -m fdb_test_runner.upgrade_test --build-dir ${CMAKE_BINARY_DIR} --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml --upgrade-path "${FDB_PREV2_RELEASE_VERSION}" "${FDB_PREV_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}" @@ -344,7 +352,7 @@ if(NOT WIN32) ) add_python_venv_test(NAME fdb_c_upgrade_from_prev2_direct - COMMAND python ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py + COMMAND python -m fdb_test_runner.upgrade_test --build-dir ${CMAKE_BINARY_DIR} --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml --upgrade-path "${FDB_PREV2_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}" @@ -352,7 +360,7 @@ if(NOT WIN32) ) add_python_venv_test(NAME fdb_c_upgrade_from_prev - COMMAND python ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py + COMMAND python -m fdb_test_runner.upgrade_test --build-dir ${CMAKE_BINARY_DIR} --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml --upgrade-path "${FDB_PREV_RELEASE_VERSION}" "${FDB_CURRENT_VERSION}" "${FDB_PREV_RELEASE_VERSION}" @@ -360,7 +368,7 @@ if(NOT WIN32) ) add_python_venv_test(NAME fdb_c_wiggle_only - COMMAND python ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py + COMMAND python -m fdb_test_runner.upgrade_test --build-dir ${CMAKE_BINARY_DIR} --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml --upgrade-path "${FDB_CURRENT_VERSION}" "wiggle" @@ -370,7 +378,7 @@ if(NOT WIN32) ) add_python_venv_test(NAME fdb_c_wiggle_and_upgrade - COMMAND python ${CMAKE_SOURCE_DIR}/tests/TestRunner/upgrade_test.py + COMMAND python -m fdb_test_runner.upgrade_test --build-dir ${CMAKE_BINARY_DIR} --test-file ${CMAKE_SOURCE_DIR}/bindings/c/test/apitester/tests/upgrade/MixedApiWorkloadMultiThr.toml --upgrade-path "${FDB_PREV_RELEASE_VERSION}" "wiggle" "${FDB_CURRENT_VERSION}" diff --git a/bindings/c/fdb_c.cpp b/bindings/c/fdb_c.cpp index bc0a84a4795..4159d139ded 100644 --- a/bindings/c/fdb_c.cpp +++ b/bindings/c/fdb_c.cpp @@ -589,7 +589,7 @@ FDBFuture* validate_and_update_parameters(int& limit, /* _ITERATOR mode maps to one of the known streaming modes depending on iteration */ - const int mode_bytes_array[] = { GetRangeLimits::BYTE_LIMIT_UNLIMITED, 256, 1000, 4096, 80000 }; + const int mode_bytes_array[] = { GetRangeLimits::BYTE_LIMIT_UNLIMITED, 256, 1000, 4096, 120000 }; /* The progression used for FDB_STREAMING_MODE_ITERATOR. Goes 1.5 * previous. */ diff --git a/bindings/c/test/apitester/run_c_api_tests.py b/bindings/c/test/apitester/run_c_api_tests.py index aa222b7683a..e0a0321f2bf 100755 --- a/bindings/c/test/apitester/run_c_api_tests.py +++ b/bindings/c/test/apitester/run_c_api_tests.py @@ -32,16 +32,10 @@ import toml # fmt: off -from tmp_cluster import TempCluster -from local_cluster import TLSConfig +from fdb_test_runner.tmp_cluster import TempCluster +from fdb_test_runner.local_cluster import TLSConfig # fmt: on -sys.path[:0] = [ - os.path.join( - os.path.dirname(__file__), "..", "..", "..", "..", "tests", "TestRunner" - ) -] - TESTER_STATS_INTERVAL_SEC = 5 @@ -156,9 +150,6 @@ class TestConfig: def __init__(self, test_file): config = toml.load(test_file) server_config = config.get("server", [{}])[0] - self.enable_encryption_at_rest = server_config.get( - "enable_encryption_at_rest", False - ) self.tls_enabled = server_config.get("tls_enabled", False) self.client_chain_len = server_config.get("tls_client_chain_len", 2) self.server_chain_len = server_config.get("tls_server_chain_len", 3) @@ -182,7 +173,6 @@ def run_test(args, test_file): with TempCluster( args.build_dir, config.num_processes, - enable_encryption_at_rest=config.enable_encryption_at_rest, tls_config=tls_config, ) as cluster: ret_code = run_tester(args, cluster, test_file) diff --git a/bindings/c/test/fdb_c_client_config_tests.py b/bindings/c/test/fdb_c_client_config_tests.py index 6cdc172782b..70949936fed 100644 --- a/bindings/c/test/fdb_c_client_config_tests.py +++ b/bindings/c/test/fdb_c_client_config_tests.py @@ -12,10 +12,10 @@ from threading import Thread import time -from fdb_version import CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION -from binary_download import FdbBinaryDownloader -from local_cluster import LocalCluster, PortProvider, TLSConfig -from test_util import random_alphanum_string +from fdb_test_runner.fdb_version import CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION +from fdb_test_runner.binary_download import FdbBinaryDownloader +from fdb_test_runner.local_cluster import LocalCluster, PortProvider, TLSConfig +from fdb_test_runner.test_util import random_alphanum_string args = None downloader = None @@ -434,7 +434,7 @@ def test_multiple_external_clients(self): def test_no_external_client_support_api_version_ignore(self): # Multiple external clients, API version supported by none of them - # Note: Ignored because API function wont be missing in last 2 releases. + # Note: Ignored because API function won't be missing in last 2 releases. test = ClientConfigTest(self) test.create_external_lib_dir([PREV2_RELEASE_VERSION, PREV_RELEASE_VERSION]) test.disable_local_client = True @@ -454,7 +454,7 @@ def test_no_external_client_support_api_version_ignore(self): def test_one_external_client_wrong_api_version_ignore(self): # Multiple external clients, API version unsupported by one of them. - # Note: Ignored because API function wont be missing in last 2 releases. + # Note: Ignored because API function won't be missing in last 2 releases. test = ClientConfigTest(self) test.create_external_lib_dir( [CURRENT_VERSION, PREV_RELEASE_VERSION, PREV2_RELEASE_VERSION] diff --git a/bindings/c/test/fdb_c_shim_tests.py b/bindings/c/test/fdb_c_shim_tests.py index 11b44bcd3b2..0f9cbad2cd1 100644 --- a/bindings/c/test/fdb_c_shim_tests.py +++ b/bindings/c/test/fdb_c_shim_tests.py @@ -6,10 +6,10 @@ import sys import os -from binary_download import FdbBinaryDownloader -from local_cluster import LocalCluster -from test_util import random_alphanum_string -from fdb_version import CURRENT_VERSION, PREV_RELEASE_VERSION +from fdb_test_runner.binary_download import FdbBinaryDownloader +from fdb_test_runner.local_cluster import LocalCluster +from fdb_test_runner.test_util import random_alphanum_string +from fdb_test_runner.fdb_version import CURRENT_VERSION, PREV_RELEASE_VERSION TESTER_STATS_INTERVAL_SEC = 5 DEFAULT_TEST_FILE = "CApiCorrectnessMultiThr.toml" diff --git a/bindings/go/src/fdb/generated.go b/bindings/go/src/fdb/generated.go index d32c0676952..8f047e69846 100644 --- a/bindings/go/src/fdb/generated.go +++ b/bindings/go/src/fdb/generated.go @@ -320,6 +320,13 @@ func (o NetworkOptions) SetClientBuggifySectionFiredProbability(param int64) err return o.setOpt(83, int64ToBytes(param)) } +// Sets the IP address to use for tracing. If not provided, the IP address will be automatically determined when connecting to a cluster. This option allows you to specify the IP address explicitly and avoid the automatic determination process. +// +// Parameter: IP address in IPv4 or IPv6 format +func (o NetworkOptions) SetTraceIp(param string) error { + return o.setOpt(84, []byte(param)) +} + // Set a tracer to run on the client. Should be set to the same value as the tracer set on the server. // // Parameter: Distributed tracer type. Choose from none, log_file, or network_lossy @@ -437,11 +444,6 @@ func (o DatabaseOptions) SetTransactionReportConflictingKeys() error { return o.setOpt(702, nil) } -// Use configuration database. -func (o DatabaseOptions) SetUseConfigDatabase() error { - return o.setOpt(800, nil) -} - // Enables verification of causal read risky by checking whether clients are able to read stale data when they detect a recovery, and logging an error if so. // // Parameter: integer between 0 and 100 expressing the probability a client will verify it can't read stale data diff --git a/bindings/python/CMakeLists.txt b/bindings/python/CMakeLists.txt index d42a6c01294..ca8b70ec341 100644 --- a/bindings/python/CMakeLists.txt +++ b/bindings/python/CMakeLists.txt @@ -78,15 +78,21 @@ endif() set(setup_file_name foundationdb-${FDB_VERSION}.tar.gz) +set(wheel_file_name foundationdb-${FDB_VERSION}-py3-none-any.whl) set(package_file ${CMAKE_BINARY_DIR}/packages/foundationdb-${FDB_VERSION}${not_fdb_release_string}.tar.gz) -add_custom_command(OUTPUT ${package_file} +set(wheel_package_file ${CMAKE_BINARY_DIR}/packages/foundationdb-${FDB_VERSION}${not_fdb_release_string}-py3-none-any.whl) + +# Build both source distribution and wheel +# Note: foundationdb is a pure Python package (uses ctypes), so wheel is py3-none-any +add_custom_command(OUTPUT ${package_file} ${wheel_package_file} COMMAND $ -m ensurepip COMMAND $ -m pip install --upgrade build - COMMAND $ -m build --sdist && - ${CMAKE_COMMAND} -E copy dist/${setup_file_name} ${package_file} + COMMAND $ -m build + COMMAND ${CMAKE_COMMAND} -E copy_if_different dist/${setup_file_name} ${package_file} + COMMAND ${CMAKE_COMMAND} -E copy_if_different dist/${wheel_file_name} ${wheel_package_file} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMENT "Create Python package") -add_custom_target(python_package DEPENDS ${package_file}) + COMMENT "Create Python source distribution and wheel") +add_custom_target(python_package DEPENDS ${package_file} ${wheel_package_file}) add_dependencies(python_package python_binding) add_dependencies(packages python_package) diff --git a/build-scripts/for-linux/test-joshua.bash b/build-scripts/for-linux/test-joshua.bash index dc28ebf4c47..f7f6ff249d5 100755 --- a/build-scripts/for-linux/test-joshua.bash +++ b/build-scripts/for-linux/test-joshua.bash @@ -3,11 +3,39 @@ # $1 - the path to the correctness archive file, ex. correctness-7.3.49-2.ow.tar.gz # $2 - number of tests -res=$(python3 -m joshua.joshua start --tarball $1 --max-runs $2 && python3 -m joshua.joshua tail --errors) - -substr="TestUID" -echo $res -if [[ $res == *$substr* ]]; then - echo >&2 "Test failed." - exit 1 -fi \ No newline at end of file +python3 -m joshua.joshua start --tarball $1 --max-runs $2 + +python3 -m joshua.joshua tail | python3 -c " +import sys, re +sys.stdout = open(sys.stdout.fileno(), 'w', buffering=1) +from datetime import datetime +failed = False +count = 1 +for line in sys.stdin: + m = re.search(r'TestFile=\"([^\"]+)\".*?Ok=\"(\d+)\"', line) + if m: + timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + print(f'[#{count} {timestamp}] TestFile=\"{m.group(1)}\" Ok=\"{m.group(2)}\"') + if m.group(2) == '0': + failed = True + else: + sys.stdout.write(line) + count += 1 +sys.exit(1 if failed else 0) +" & + +TAIL_PID=$! + +while kill -0 "$TAIL_PID" 2>/dev/null; do + sleep 30 + if ! podman ps -q 2>/dev/null | grep -q .; then + echo "ERROR: all joshua-agent containers have stopped unexpectedly" >&2 + kill "$TAIL_PID" 2>/dev/null + echo "Stopping joshua due to container failure..." + python3 -m joshua.joshua stop + exit 1 + fi +done + +wait "$TAIL_PID" +exit $? diff --git a/cmake/AddFdbTest.cmake b/cmake/AddFdbTest.cmake index ca2b880adf9..82616b3e5c9 100644 --- a/cmake/AddFdbTest.cmake +++ b/cmake/AddFdbTest.cmake @@ -225,29 +225,33 @@ function(stage_correctness_package) endforeach() endforeach() - list(APPEND package_files ${STAGE_OUT_DIR}/bin/fdbserver - ${STAGE_OUT_DIR}/bin/coverage.fdbserver.xml - ${STAGE_OUT_DIR}/bin/coverage.fdbclient.xml - ${STAGE_OUT_DIR}/bin/coverage.fdbrpc.xml - ${STAGE_OUT_DIR}/bin/coverage.flow.xml - ${STAGE_OUT_DIR}/CMakeCache.txt - ) + set(package_files ${STAGE_OUT_DIR}/bin/fdbserver + ${STAGE_OUT_DIR}/CMakeCache.txt) + + set(package_dependencies ${CMAKE_BINARY_DIR}/CMakeCache.txt + ${CMAKE_BINARY_DIR}/packages/bin/fdbserver) + + set(copy_sources ${CMAKE_BINARY_DIR}/packages/bin/fdbserver) + if(COVERAGETOOL_AVAILABLE) + list(APPEND package_files ${STAGE_OUT_DIR}/bin/coverage.fdbserver.xml + ${STAGE_OUT_DIR}/bin/coverage.fdbclient.xml + ${STAGE_OUT_DIR}/bin/coverage.fdbrpc.xml + ${STAGE_OUT_DIR}/bin/coverage.flow.xml) + list(APPEND package_dependencies ${CMAKE_BINARY_DIR}/bin/coverage.fdbserver.xml + ${CMAKE_BINARY_DIR}/lib/coverage.fdbclient.xml + ${CMAKE_BINARY_DIR}/lib/coverage.fdbrpc.xml + ${CMAKE_BINARY_DIR}/lib/coverage.flow.xml) + list(APPEND copy_sources ${CMAKE_BINARY_DIR}/bin/coverage.fdbserver.xml + ${CMAKE_BINARY_DIR}/lib/coverage.fdbclient.xml + ${CMAKE_BINARY_DIR}/lib/coverage.fdbrpc.xml + ${CMAKE_BINARY_DIR}/lib/coverage.flow.xml) + endif() add_custom_command( OUTPUT ${package_files} - DEPENDS ${CMAKE_BINARY_DIR}/CMakeCache.txt - ${CMAKE_BINARY_DIR}/packages/bin/fdbserver - ${CMAKE_BINARY_DIR}/bin/coverage.fdbserver.xml - ${CMAKE_BINARY_DIR}/lib/coverage.fdbclient.xml - ${CMAKE_BINARY_DIR}/lib/coverage.fdbrpc.xml - ${CMAKE_BINARY_DIR}/lib/coverage.flow.xml + DEPENDS ${package_dependencies} COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/CMakeCache.txt ${STAGE_OUT_DIR} - COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_BINARY_DIR}/packages/bin/fdbserver - ${CMAKE_BINARY_DIR}/bin/coverage.fdbserver.xml - ${CMAKE_BINARY_DIR}/lib/coverage.fdbclient.xml - ${CMAKE_BINARY_DIR}/lib/coverage.fdbrpc.xml - ${CMAKE_BINARY_DIR}/lib/coverage.flow.xml - ${STAGE_OUT_DIR}/bin + COMMAND ${CMAKE_COMMAND} -E copy ${copy_sources} ${STAGE_OUT_DIR}/bin COMMENT "Copying files for ${STAGE_CONTEXT} package" ) @@ -283,6 +287,15 @@ function(create_correctness_package) set(out_dir "${CMAKE_BINARY_DIR}/correctness") stage_correctness_package(OUT_DIR ${out_dir} CONTEXT "correctness" OUT_FILES package_files TEST_LIST "${TEST_NAMES}") set(tar_file ${CMAKE_BINARY_DIR}/packages/correctness-${FDB_VERSION}.tar.gz) + + # Check if test_args.txt exists and prepare optional file list + set(optional_test_args_files "") + set(optional_copy_commands "") + if(EXISTS "${CMAKE_SOURCE_DIR}/test_args.txt") + list(APPEND optional_test_args_files "${out_dir}/test_args.txt") + set(optional_copy_commands COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/test_args.txt ${out_dir}/test_args.txt) + endif() + add_custom_command( OUTPUT ${tar_file} DEPENDS ${package_files} @@ -292,9 +305,11 @@ function(create_correctness_package) ${out_dir}/joshua_test COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_SOURCE_DIR}/contrib/Joshua/scripts/correctnessTimeout.sh ${out_dir}/joshua_timeout + ${optional_copy_commands} COMMAND ${CMAKE_COMMAND} -E tar cfz ${tar_file} ${package_files} ${out_dir}/joshua_test ${out_dir}/joshua_timeout + ${optional_test_args_files} WORKING_DIRECTORY ${out_dir} COMMENT "Package correctness archive" ) @@ -387,6 +402,10 @@ function(prepare_binding_test_files build_directory target_name target_dependenc add_dependencies(${target_name} python_binding) set(generated_binding_files python/fdb/fdboptions.py python/fdb/apiversion.py) + # Ensure Python binding files are generated before we try to copy them + if(WITH_PYTHON_BINDING) + add_dependencies(${target_name} fdb_python_options) + endif() if(WITH_JAVA_BINDING) set(not_fdb_release_string "") add_custom_command( @@ -593,6 +612,7 @@ string(APPEND test_venv_cmd "${Python3_EXECUTABLE} -m venv ${test_venv_dir} ") string(APPEND test_venv_cmd "&& ${test_venv_activate} ") string(APPEND test_venv_cmd "&& pip install --upgrade pip ") string(APPEND test_venv_cmd "&& pip install -r ${CMAKE_SOURCE_DIR}/tests/TestRunner/requirements.txt") +string(APPEND test_venv_cmd "&& pip install -e ${CMAKE_SOURCE_DIR}/tests/TestRunner ") # NOTE: At this stage we are in the virtual environment and Python3_EXECUTABLE is not available anymore string(APPEND test_venv_cmd "&& (cd ${CMAKE_BINARY_DIR}/bindings/python && python3 -m pip install .) ") add_test( @@ -633,13 +653,12 @@ function(add_python_venv_test) WORKING_DIRECTORY ${T_WORKING_DIRECTORY} COMMAND ${shell_cmd} ${shell_opt} "${test_venv_activate} && ${T_COMMAND}") set_tests_properties(${T_NAME} PROPERTIES FIXTURES_REQUIRED test_virtual_env_setup TIMEOUT ${T_TEST_TIMEOUT}) - set(test_env_vars "PYTHONPATH=${CMAKE_SOURCE_DIR}/tests/TestRunner:${CMAKE_BINARY_DIR}/tests/TestRunner") if(APPLE) set(ld_env_name "DYLD_LIBRARY_PATH") else() set(ld_env_name "LD_LIBRARY_PATH") endif() - set(test_env_vars PROPERTIES ENVIRONMENT "${test_env_vars};${ld_env_name}=${CMAKE_BINARY_DIR}/lib:$ENV{${ld_env_name}}") + set(test_env_vars "${ld_env_name}=${CMAKE_BINARY_DIR}/lib:$ENV{${ld_env_name}}") if(USE_SANITIZER) set(test_env_vars "${test_env_vars};${SANITIZER_OPTIONS}") endif() @@ -667,7 +686,7 @@ function(add_fdbclient_test) if(NOT T_COMMAND) message(FATAL_ERROR "COMMAND is a required argument for add_fdbclient_test") endif() - set(TMP_CLUSTER_CMD python ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_cluster.py --build-dir ${CMAKE_BINARY_DIR}) + set(TMP_CLUSTER_CMD python -m fdb_test_runner.tmp_cluster --build-dir ${CMAKE_BINARY_DIR}) if(T_PROCESS_NUMBER) list(APPEND TMP_CLUSTER_CMD --process-number ${T_PROCESS_NUMBER}) endif() @@ -720,7 +739,7 @@ function(add_unavailable_fdbclient_test) message(STATUS "Adding unavailable client test ${T_NAME}") add_python_venv_test( NAME ${T_NAME} - COMMAND python ${CMAKE_SOURCE_DIR}/tests/TestRunner/fake_cluster.py + COMMAND python -m fdb_test_runner.fake_cluster --output-dir ${CMAKE_BINARY_DIR} -- ${T_COMMAND} TEST_TIMEOUT ${T_TEST_TIMEOUT}) endfunction() @@ -748,7 +767,7 @@ function(add_multi_fdbclient_test) message(STATUS "Adding Client test ${T_NAME}") add_python_venv_test( NAME ${T_NAME} - COMMAND python ${CMAKE_SOURCE_DIR}/tests/TestRunner/tmp_multi_cluster.py + COMMAND python -m fdb_test_runner.tmp_multi_cluster --build-dir ${CMAKE_BINARY_DIR} --clusters 3 -- ${T_COMMAND} TEST_TIMEOUT 60) diff --git a/cmake/CPackConfig.cmake b/cmake/CPackConfig.cmake index 664892b9979..62f89306d40 100644 --- a/cmake/CPackConfig.cmake +++ b/cmake/CPackConfig.cmake @@ -1,8 +1,6 @@ if(CPACK_GENERATOR MATCHES "RPM") set(CPACK_PACKAGING_INSTALL_PREFIX "/") set(CPACK_COMPONENTS_ALL clients-el9 server-el9 clients-versioned server-versioned) - set(CPACK_RESOURCE_FILE_README ${CMAKE_SOURCE_DIR}/README.md) - set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_SOURCE_DIR}/LICENSE) elseif(CPACK_GENERATOR MATCHES "DEB") set(CPACK_PACKAGING_INSTALL_PREFIX "/") set(CPACK_COMPONENTS_ALL clients-deb server-deb clients-versioned server-versioned) diff --git a/cmake/CompileActorCompiler.cmake b/cmake/CompileActorCompiler.cmake index 1e3c12d19f6..534208d223b 100644 --- a/cmake/CompileActorCompiler.cmake +++ b/cmake/CompileActorCompiler.cmake @@ -1,7 +1,8 @@ find_package(Python3 REQUIRED COMPONENTS Interpreter) -find_program(MCS_EXECUTABLE mcs) -find_program(MONO_EXECUTABLE mono) +if(NOT DEFINED FDB_USE_CSHARP_TOOLS) + set(FDB_USE_CSHARP_TOOLS TRUE) +endif() set(ACTORCOMPILER_PY_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/flow/actorcompiler_py/__main__.py @@ -12,7 +13,7 @@ set(ACTORCOMPILER_PY_SRCS set(ACTORCOMPILER_CSPROJ ${CMAKE_CURRENT_SOURCE_DIR}/flow/actorcompiler/actorcompiler.csproj) -set(ACTORCOMPILER_SRCS +set(ACTORCOMPILER_LEGACY_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/flow/actorcompiler/ActorCompiler.cs ${CMAKE_CURRENT_SOURCE_DIR}/flow/actorcompiler/ActorParser.cs ${CMAKE_CURRENT_SOURCE_DIR}/flow/actorcompiler/ParseTree.cs @@ -25,48 +26,63 @@ set(ACTOR_COMPILER_REFERENCES add_custom_target(actorcompiler_py DEPENDS ${ACTORCOMPILER_PY_SRCS}) -if(WIN32) - add_executable(actorcompiler_csharp ${ACTORCOMPILER_SRCS}) - target_compile_options(actorcompiler_csharp PRIVATE "/langversion:6") - set_property( - TARGET actorcompiler_csharp - PROPERTY VS_DOTNET_REFERENCES - "System" - "System.Core" - "System.Xml.Linq" - "System.Data.DataSetExtensions" - "Microsoft.CSharp" - "System.Data" - "System.Xml") - set(ACTORCOMPILER_CSHARP_COMMAND $ - CACHE INTERNAL "Command to run the C# actor compiler") - add_custom_target(actorcompiler) - add_dependencies(actorcompiler actorcompiler_csharp actorcompiler_py) -elseif(CSHARP_USE_MONO) - add_custom_command( - OUTPUT actorcompiler.exe - COMMAND ${CSHARP_COMPILER_EXECUTABLE} ARGS ${ACTOR_COMPILER_REFERENCES} - ${ACTORCOMPILER_SRCS} "-target:exe" "-out:actorcompiler.exe" - DEPENDS ${ACTORCOMPILER_SRCS} - COMMENT "Compile actor compiler" - VERBATIM) - add_custom_target(actorcompiler_csharp - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/actorcompiler.exe) - set(actor_exe "${CMAKE_CURRENT_BINARY_DIR}/actorcompiler.exe") - set(ACTORCOMPILER_CSHARP_COMMAND ${MONO_EXECUTABLE} ${actor_exe} - CACHE INTERNAL "Command to run the C# actor compiler") +set(ACTORCOMPILER_PY_COMMAND + ${Python3_EXECUTABLE} -m flow.actorcompiler_py + CACHE INTERNAL "Command to run the Python actor compiler") +set(ACTORCOMPILER_CSHARP_COMMAND "" + CACHE INTERNAL "Command to run the C# actor compiler") + +set(ACTORCOMPILER_COMMAND ${ACTORCOMPILER_PY_COMMAND} + CACHE INTERNAL "Command to run the actor compiler") + +set(actorcompiler_dependencies actorcompiler_py) + +if(FDB_USE_CSHARP_TOOLS AND CSHARP_TOOLCHAIN_FOUND) + if(WIN32) + add_executable(actorcompiler_csharp ${ACTORCOMPILER_LEGACY_SRCS}) + target_compile_options(actorcompiler_csharp PRIVATE "/langversion:6") + set_property( + TARGET actorcompiler_csharp + PROPERTY VS_DOTNET_REFERENCES + "System" + "System.Core" + "System.Xml.Linq" + "System.Data.DataSetExtensions" + "Microsoft.CSharp" + "System.Data" + "System.Xml") + set(ACTORCOMPILER_CSHARP_COMMAND $ + CACHE INTERNAL "Command to run the C# actor compiler") + list(APPEND actorcompiler_dependencies actorcompiler_csharp) + elseif(CSHARP_USE_MONO) + add_custom_command( + OUTPUT actorcompiler.exe + COMMAND ${CSHARP_COMPILER_EXECUTABLE} ARGS ${ACTOR_COMPILER_REFERENCES} + ${ACTORCOMPILER_LEGACY_SRCS} "-target:exe" "-out:actorcompiler.exe" + DEPENDS ${ACTORCOMPILER_LEGACY_SRCS} + COMMENT "Compile actor compiler" + VERBATIM) + add_custom_target(actorcompiler_csharp + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/actorcompiler.exe) + set(actor_exe "${CMAKE_CURRENT_BINARY_DIR}/actorcompiler.exe") + set(ACTORCOMPILER_CSHARP_COMMAND ${MONO_EXECUTABLE} ${actor_exe} + CACHE INTERNAL "Command to run the C# actor compiler") + list(APPEND actorcompiler_dependencies actorcompiler_csharp) + else() + dotnet_build(${ACTORCOMPILER_CSPROJ} SOURCE ${ACTORCOMPILER_LEGACY_SRCS}) + set(actor_exe "${actorcompiler_EXECUTABLE_PATH}") + message(STATUS "Actor compiler path: ${actor_exe}") + set(ACTORCOMPILER_CSHARP_COMMAND ${dotnet_EXECUTABLE} ${actor_exe} + CACHE INTERNAL "Command to run the C# actor compiler") + endif() +endif() + +if(NOT TARGET actorcompiler) add_custom_target(actorcompiler) - add_dependencies(actorcompiler actorcompiler_csharp actorcompiler_py) -else() - dotnet_build(${ACTORCOMPILER_CSPROJ} SOURCE ${ACTORCOMPILER_SRCS}) - set(actor_exe "${actorcompiler_EXECUTABLE_PATH}") - message(STATUS "Actor compiler path: ${actor_exe}") - # dotnet_build already creates a target named 'actorcompiler', so we just add Python dependency - add_dependencies(actorcompiler actorcompiler_py) - set(ACTORCOMPILER_CSHARP_COMMAND ${actor_exe} - CACHE INTERNAL "Command to run the C# actor compiler") endif() +add_dependencies(actorcompiler ${actorcompiler_dependencies}) -set(ACTORCOMPILER_COMMAND - ${Python3_EXECUTABLE} -m flow.actorcompiler_py - CACHE INTERNAL "Command to run the actor compiler") +if(ACTORCOMPILER_CSHARP_COMMAND) + set(ACTORCOMPILER_COMMAND ${ACTORCOMPILER_CSHARP_COMMAND} + CACHE INTERNAL "Command to run the actor compiler" FORCE) +endif() diff --git a/cmake/CompileBoost.cmake b/cmake/CompileBoost.cmake index 4829e1a6526..ea9fbf46201 100644 --- a/cmake/CompileBoost.cmake +++ b/cmake/CompileBoost.cmake @@ -86,6 +86,8 @@ function(compile_boost) --with-libraries=${BOOTSTRAP_LIBRARIES} --with-toolset=${BOOST_TOOLSET} BUILD_COMMAND ${B2_COMMAND} + # Added -d0 flag to B2_COMMAND to suppress informational Boost build output + -d0 link=static ${B2_ADDTTIONAL_BUILD_ARGS} ${COMPILE_BOOST_BUILD_ARGS} --prefix=${BOOST_INSTALL_DIR} @@ -174,11 +176,9 @@ if(BOOST_ROOT) endif() if(WIN32) - # this should be done with the line below -- but apparently the CI is not set up - # properly for config mode. So we use the old way on Windows - # find_package(Boost 1.72.0 EXACT QUIET REQUIRED CONFIG PATHS ${BOOST_HINT_PATHS}) - # I think depending on the cmake version this will cause weird warnings - find_package(Boost 1.86 COMPONENTS filesystem iostreams serialization system program_options url) + # Use CONFIG mode to prefer Boost's BoostConfig.cmake over deprecated FindBoost module + # This is required for CMake 3.30+ compatibility (policy CMP0167) + find_package(Boost 1.86.0 EXACT QUIET COMPONENTS filesystem iostreams serialization system program_options url CONFIG PATHS ${BOOST_HINT_PATHS}) add_library(boost_target INTERFACE) target_link_libraries(boost_target INTERFACE Boost::boost Boost::filesystem Boost::iostreams Boost::serialization Boost::system Boost::url) diff --git a/cmake/CompileCoverageTool.cmake b/cmake/CompileCoverageTool.cmake index 668a0139e14..282f747a079 100644 --- a/cmake/CompileCoverageTool.cmake +++ b/cmake/CompileCoverageTool.cmake @@ -4,6 +4,8 @@ set(COVERAGETOOL_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/flow/coveragetool/Program.cs ${CMAKE_CURRENT_SOURCE_DIR}/flow/coveragetool/Properties/AssemblyInfo.cs) +set(coveragetool_command "") + if(WIN32) add_executable(coveragetool ${COVERAGETOOL_SRCS}) target_compile_options(coveragetool PRIVATE "/langversion:6") @@ -32,7 +34,16 @@ elseif(CSHARP_USE_MONO) add_custom_target(coveragetool DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/coveragetool.exe) set(coveragetool_exe "${CMAKE_CURRENT_BINARY_DIR}/coveragetool.exe") + set(coveragetool_command ${MONO_EXECUTABLE} ${coveragetool_exe}) else() dotnet_build(${COVERAGETOOL_CSPROJ} SOURCE ${COVERAGETOOL_SRCS}) set(coveragetool_exe "${coveragetool_EXECUTABLE_PATH}") + set(coveragetool_command ${dotnet_EXECUTABLE} ${coveragetool_exe}) +endif() + +if(NOT coveragetool_command) + set(coveragetool_command ${coveragetool_exe}) endif() +set(coveragetool_command + ${coveragetool_command} + CACHE INTERNAL "Command to run coveragetool") diff --git a/cmake/CompileRocksDB.cmake b/cmake/CompileRocksDB.cmake index cfa15dd099c..99ae2581a3c 100644 --- a/cmake/CompileRocksDB.cmake +++ b/cmake/CompileRocksDB.cmake @@ -1,6 +1,82 @@ # FindRocksDB -find_package(RocksDB 9.7.3) +# Load RocksDB version configuration from source-controlled file +include(${CMAKE_CURRENT_LIST_DIR}/RocksDBVersion.cmake) + +# Validate configuration - must use exactly one of the two options +if(ROCKSDB_GIT_HASH AND ROCKSDB_VERSION) + message(FATAL_ERROR + "Both ROCKSDB_GIT_HASH and ROCKSDB_VERSION are set in cmake/RocksDBVersion.cmake. " + "Please choose only ONE option: comment out the one you don't want to use.") +endif() + +if(NOT ROCKSDB_GIT_HASH AND NOT ROCKSDB_VERSION) + message(FATAL_ERROR + "Neither ROCKSDB_GIT_HASH nor ROCKSDB_VERSION is set in cmake/RocksDBVersion.cmake. " + "Please configure at least one option.") +endif() + +# Determine version string for header generation +if(ROCKSDB_GIT_HASH) + # Auto-fetch version from GitHub + message(STATUS "Fetching RocksDB version for commit ${ROCKSDB_GIT_HASH}...") + set(ROCKSDB_VERSION_URL "https://raw.githubusercontent.com/facebook/rocksdb/${ROCKSDB_GIT_HASH}/include/rocksdb/version.h") + file(DOWNLOAD + ${ROCKSDB_VERSION_URL} + "${CMAKE_BINARY_DIR}/rocksdb_version_check.h" + STATUS ROCKSDB_VERSION_DOWNLOAD_STATUS + TIMEOUT 30) + list(GET ROCKSDB_VERSION_DOWNLOAD_STATUS 0 ROCKSDB_VERSION_DOWNLOAD_ERROR) + if(NOT ROCKSDB_VERSION_DOWNLOAD_ERROR EQUAL 0) + list(GET ROCKSDB_VERSION_DOWNLOAD_STATUS 1 ROCKSDB_VERSION_DOWNLOAD_ERROR_MSG) + message(FATAL_ERROR + "Failed to fetch RocksDB version.h from ${ROCKSDB_VERSION_URL}: ${ROCKSDB_VERSION_DOWNLOAD_ERROR_MSG}\n" + "Network access is required when using ROCKSDB_GIT_HASH.") + endif() + + # Parse version from downloaded file + file(READ "${CMAKE_BINARY_DIR}/rocksdb_version_check.h" ROCKSDB_VERSION_H_CONTENT) + string(REGEX MATCH "#define ROCKSDB_MAJOR ([0-9]+)" _ ${ROCKSDB_VERSION_H_CONTENT}) + set(FDB_ROCKSDB_MAJOR ${CMAKE_MATCH_1}) + string(REGEX MATCH "#define ROCKSDB_MINOR ([0-9]+)" _ ${ROCKSDB_VERSION_H_CONTENT}) + set(FDB_ROCKSDB_MINOR ${CMAKE_MATCH_1}) + string(REGEX MATCH "#define ROCKSDB_PATCH ([0-9]+)" _ ${ROCKSDB_VERSION_H_CONTENT}) + set(FDB_ROCKSDB_PATCH ${CMAKE_MATCH_1}) + + if(NOT FDB_ROCKSDB_MAJOR OR NOT FDB_ROCKSDB_MINOR OR NOT FDB_ROCKSDB_PATCH) + message(FATAL_ERROR "Failed to parse version from RocksDB version.h") + endif() + + message(STATUS "Detected RocksDB version: ${FDB_ROCKSDB_MAJOR}.${FDB_ROCKSDB_MINOR}.${FDB_ROCKSDB_PATCH}") + set(FDB_ROCKSDB_GIT_HASH ${ROCKSDB_GIT_HASH}) +else() + set(ROCKSDB_VERSION_FOR_HEADER ${ROCKSDB_VERSION}) + # Parse version string into MAJOR.MINOR.PATCH + string(REPLACE "." ";" ROCKSDB_VERSION_LIST ${ROCKSDB_VERSION_FOR_HEADER}) + list(LENGTH ROCKSDB_VERSION_LIST ROCKSDB_VERSION_LIST_LENGTH) + if(NOT ROCKSDB_VERSION_LIST_LENGTH EQUAL 3) + message(FATAL_ERROR + "Invalid RocksDB version format: ${ROCKSDB_VERSION_FOR_HEADER}. Expected MAJOR.MINOR.PATCH (e.g., 9.7.3)") + endif() + list(GET ROCKSDB_VERSION_LIST 0 FDB_ROCKSDB_MAJOR) + list(GET ROCKSDB_VERSION_LIST 1 FDB_ROCKSDB_MINOR) + list(GET ROCKSDB_VERSION_LIST 2 FDB_ROCKSDB_PATCH) + set(FDB_ROCKSDB_GIT_HASH "") +endif() + +# Generate FDBRocksDBVersion.h from template into build directory +# This file is NOT source-controlled - it's regenerated at cmake configure time +file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/fdbserver/include/fdbserver) +configure_file( + ${CMAKE_CURRENT_LIST_DIR}/FDBRocksDBVersion.h.in + ${CMAKE_BINARY_DIR}/fdbserver/include/fdbserver/FDBRocksDBVersion.h + @ONLY) +message(STATUS "Generated FDBRocksDBVersion.h with version ${FDB_ROCKSDB_MAJOR}.${FDB_ROCKSDB_MINOR}.${FDB_ROCKSDB_PATCH}") + +# Only try to find system RocksDB if not using a specific commit hash +if(NOT ROCKSDB_GIT_HASH) + find_package(RocksDB ${ROCKSDB_VERSION}) +endif() include(ExternalProject) @@ -42,11 +118,29 @@ if(ROCKSDB_FOUND) set(ROCKSDB_LIBRARIES ${ROCKSDB_LIBRARY}) add_custom_target(rocksdb DEPENDS ${ROCKSDB_LIBRARIES} ${ROCKSDB_INCLUDE_DIR}) else() + # Determine download URL and hash based on whether using commit hash or version + if(ROCKSDB_GIT_HASH) + if(NOT ROCKSDB_GIT_HASH_SHA256) + message(FATAL_ERROR "ROCKSDB_GIT_HASH_SHA256 is required when using ROCKSDB_GIT_HASH for security. " + "Get the SHA256 with: curl -sL https://github.com/facebook/rocksdb/archive/${ROCKSDB_GIT_HASH}.tar.gz | sha256sum") + endif() + set(ROCKSDB_DOWNLOAD_URL "https://github.com/facebook/rocksdb/archive/${ROCKSDB_GIT_HASH}.tar.gz") + set(ROCKSDB_URL_HASH_ARG URL_HASH SHA256=${ROCKSDB_GIT_HASH_SHA256}) + message(STATUS "Building RocksDB from commit: ${ROCKSDB_GIT_HASH}") + else() + if(NOT ROCKSDB_VERSION_SHA256) + message(FATAL_ERROR "ROCKSDB_VERSION_SHA256 is required when using ROCKSDB_VERSION for security. " + "Get the SHA256 with: curl -sL https://github.com/facebook/rocksdb/archive/refs/tags/v${ROCKSDB_VERSION}.tar.gz | sha256sum") + endif() + set(ROCKSDB_DOWNLOAD_URL "https://github.com/facebook/rocksdb/archive/refs/tags/v${ROCKSDB_VERSION}.tar.gz") + set(ROCKSDB_URL_HASH_ARG URL_HASH SHA256=${ROCKSDB_VERSION_SHA256}) + message(STATUS "Building RocksDB version: ${ROCKSDB_VERSION}") + endif() + ExternalProject_Add( rocksdb - URL https://github.com/facebook/rocksdb/archive/refs/tags/v9.7.3.tar.gz - URL_HASH - SHA256=acfabb989cbfb5b5c4d23214819b059638193ec33dad2d88373c46448d16d38b + URL ${ROCKSDB_DOWNLOAD_URL} + ${ROCKSDB_URL_HASH_ARG} CMAKE_ARGS ${RocksDB_CMAKE_ARGS} BUILD_BYPRODUCTS /librocksdb.a INSTALL_COMMAND "") diff --git a/cmake/CompileVexillographer.cmake b/cmake/CompileVexillographer.cmake index 03216548d2c..2d696cd8057 100644 --- a/cmake/CompileVexillographer.cmake +++ b/cmake/CompileVexillographer.cmake @@ -7,28 +7,45 @@ set(VEXILLOGRAPHER_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/fdbclient/vexillographer/python.cs ${CMAKE_CURRENT_SOURCE_DIR}/fdbclient/vexillographer/ruby.cs ${CMAKE_CURRENT_SOURCE_DIR}/fdbclient/vexillographer/vexillographer.cs) +set(VEXILLOGRAPHER_PY ${CMAKE_CURRENT_SOURCE_DIR}/fdbclient/vexillographer/vexillographer.py) -if(WIN32) +if(NOT DEFINED FDB_USE_CSHARP_TOOLS) + set(FDB_USE_CSHARP_TOOLS TRUE) +endif() +set(VEXILLOGRAPHER_COMMAND "") + +if(WIN32 AND FDB_USE_CSHARP_TOOLS) add_executable(vexillographer ${VEXILLOGRAPHER_SRCS}) target_compile_options(vexillographer PRIVATE "/langversion:6") set_property( TARGET vexillographer PROPERTY VS_DOTNET_REFERENCES "System" "System.Core" "System.Data" "System.Xml" "System.Xml.Linq") -elseif(CSHARP_USE_MONO) - set(VEXILLOGRAPHER_REFERENCES - "-r:System,System.Core,System.Data,System.Xml,System.Xml.Linq") - set(VEXILLOGRAPHER_EXE "${CMAKE_CURRENT_BINARY_DIR}/vexillographer.exe") - add_custom_command( - OUTPUT ${VEXILLOGRAPHER_EXE} - COMMAND ${CSHARP_COMPILER_EXECUTABLE} ARGS ${VEXILLOGRAPHER_REFERENCES} - ${VEXILLOGRAPHER_SRCS} -target:exe -out:${VEXILLOGRAPHER_EXE} - DEPENDS ${VEXILLOGRAPHER_SRCS} - COMMENT "Compile Vexillographer") - add_custom_target(vexillographer DEPENDS ${VEXILLOGRAPHER_EXE}) + set(VEXILLOGRAPHER_DEPENDS vexillographer) +elseif(FDB_USE_CSHARP_TOOLS AND CSHARP_TOOLCHAIN_FOUND) + if(CSHARP_USE_MONO) + set(VEXILLOGRAPHER_REFERENCES + "-r:System,System.Core,System.Data,System.Xml,System.Xml.Linq") + set(VEXILLOGRAPHER_EXE "${CMAKE_CURRENT_BINARY_DIR}/vexillographer.exe") + add_custom_command( + OUTPUT ${VEXILLOGRAPHER_EXE} + COMMAND ${CSHARP_COMPILER_EXECUTABLE} ARGS ${VEXILLOGRAPHER_REFERENCES} + ${VEXILLOGRAPHER_SRCS} -target:exe -out:${VEXILLOGRAPHER_EXE} + DEPENDS ${VEXILLOGRAPHER_SRCS} + COMMENT "Compile Vexillographer") + add_custom_target(vexillographer DEPENDS ${VEXILLOGRAPHER_EXE}) + set(VEXILLOGRAPHER_DEPENDS vexillographer) + set(VEXILLOGRAPHER_COMMAND ${MONO_EXECUTABLE} ${VEXILLOGRAPHER_EXE}) + else() + dotnet_build(${VEXILLOGRAPHER_CSPROJ} SOURCE ${VEXILLOGRAPHER_SRCS}) + message(STATUS "Generated executable: ${vexillographer_EXECUTABLE_PATH}") + set(VEXILLOGRAPHER_EXE ${vexillographer_EXECUTABLE_PATH}) + set(VEXILLOGRAPHER_COMMAND ${dotnet_EXECUTABLE} ${vexillographer_EXECUTABLE_PATH}) + set(VEXILLOGRAPHER_DEPENDS ${vexillographer_EXECUTABLE_PATH}) + endif() else() - dotnet_build(${VEXILLOGRAPHER_CSPROJ} SOURCE ${VEXILLOGRAPHER_SRCS}) - message(STATUS "Generated executable: ${vexillographer_EXECUTABLE_PATH}") - set(VEXILLOGRAPHER_EXE ${vexillographer_EXECUTABLE_PATH}) + find_package(Python3 COMPONENTS Interpreter REQUIRED) + set(VEXILLOGRAPHER_COMMAND ${Python3_EXECUTABLE} ${VEXILLOGRAPHER_PY}) + set(VEXILLOGRAPHER_DEPENDS ${VEXILLOGRAPHER_PY}) endif() function(vexillographer_compile) @@ -41,21 +58,19 @@ function(vexillographer_compile) set(VX_OUTPUT ${VX_OUT}) endif() - if(WIN32) + if(WIN32 AND FDB_USE_CSHARP_TOOLS) add_custom_command( OUTPUT ${VX_OUTPUT} - COMMAND - $ + COMMAND $ ${CMAKE_SOURCE_DIR}/fdbclient/vexillographer/fdb.options ${VX_LANG} ${VX_OUT} DEPENDS ${CMAKE_SOURCE_DIR}/fdbclient/vexillographer/fdb.options vexillographer COMMENT "Generate FDBOptions ${VX_LANG} files") - elseif(CSHARP_USE_MONO) + elseif(FDB_USE_CSHARP_TOOLS AND CSHARP_TOOLCHAIN_FOUND AND CSHARP_USE_MONO) add_custom_command( OUTPUT ${VX_OUTPUT} - COMMAND - ${MONO_EXECUTABLE} ${VEXILLOGRAPHER_EXE} + COMMAND ${VEXILLOGRAPHER_COMMAND} ${CMAKE_SOURCE_DIR}/fdbclient/vexillographer/fdb.options ${VX_LANG} ${VX_OUT} DEPENDS ${CMAKE_SOURCE_DIR}/fdbclient/vexillographer/fdb.options @@ -64,12 +79,11 @@ function(vexillographer_compile) else() add_custom_command( OUTPUT ${VX_OUTPUT} - COMMAND - ${VEXILLOGRAPHER_EXE} + COMMAND ${VEXILLOGRAPHER_COMMAND} ${CMAKE_SOURCE_DIR}/fdbclient/vexillographer/fdb.options ${VX_LANG} ${VX_OUT} DEPENDS ${CMAKE_SOURCE_DIR}/fdbclient/vexillographer/fdb.options - vexillographer + ${VEXILLOGRAPHER_DEPENDS} COMMENT "Generate FDBOptions ${VX_LANG} files") endif() diff --git a/cmake/FDBComponents.cmake b/cmake/FDBComponents.cmake index 50013f10ecc..59023fc03f4 100644 --- a/cmake/FDBComponents.cmake +++ b/cmake/FDBComponents.cmake @@ -256,6 +256,7 @@ set(WITH_ROCKSDB ON CACHE BOOL "Build with experimental RocksDB support") set(PORTABLE_ROCKSDB 1 CACHE STRING "Minimum CPU arch to support (i.e. skylake, haswell, etc., or 0 = current CPU, 1 = baseline CPU)") set(ROCKSDB_TOOLS OFF CACHE BOOL "Compile RocksDB tools") set(WITH_LIBURING OFF CACHE BOOL "Build with liburing enabled") # Set this to ON to include liburing +# RocksDB version/commit configuration is in cmake/RocksDBVersion.cmake ################################################################################ # TOML11 diff --git a/fdbserver/include/fdbserver/FDBRocksDBVersion.h b/cmake/FDBRocksDBVersion.h.in similarity index 67% rename from fdbserver/include/fdbserver/FDBRocksDBVersion.h rename to cmake/FDBRocksDBVersion.h.in index bd221af5c89..f304210c43e 100644 --- a/fdbserver/include/fdbserver/FDBRocksDBVersion.h +++ b/cmake/FDBRocksDBVersion.h.in @@ -18,11 +18,16 @@ * limitations under the License. */ +// AUTO-GENERATED FILE - DO NOT EDIT OR COMMIT +// Generated by CMake into the build directory from cmake/FDBRocksDBVersion.h.in +// To change RocksDB version, edit cmake/RocksDBVersion.cmake + #ifndef FDBSERVER_FDBROCKSDBVERSION_H #define FDBSERVER_FDBROCKSDBVERSION_H -#define FDB_ROCKSDB_MAJOR 9 -#define FDB_ROCKSDB_MINOR 7 -#define FDB_ROCKSDB_PATCH 3 +#define FDB_ROCKSDB_MAJOR @FDB_ROCKSDB_MAJOR@ +#define FDB_ROCKSDB_MINOR @FDB_ROCKSDB_MINOR@ +#define FDB_ROCKSDB_PATCH @FDB_ROCKSDB_PATCH@ +#define FDB_ROCKSDB_GIT_HASH "@FDB_ROCKSDB_GIT_HASH@" -#endif \ No newline at end of file +#endif diff --git a/cmake/Findbenchmark.cmake b/cmake/Findbenchmark.cmake new file mode 100644 index 00000000000..26176d8833a --- /dev/null +++ b/cmake/Findbenchmark.cmake @@ -0,0 +1,82 @@ +# Distributed under the OSI-approved Apache 2.0. See the LICENSE file in +# FoundationDB source code + +#[=======================================================================[.rst: +Findbenchmark +------- + +Find Google Benchmark + +benchmark_ROOT variable can be used for HINTS for different version of Google benchmark. + +Result variables +^^^^^^^^^^^^^^^^ + +This module will set the following variables in your project: + +``benchmark_FOUND`` + If false, do not try to use Google Benchmark. +``benchmark_INCLUDE_DIR`` + path to benchmark include directory +``benchmark_LIBRARY`` + path to benchmark library +``benchmark_main_LIBRARY`` + path to benchmark main library + +This module will also define the following imported target: + +``benchmark::benchmark`` + An imported target for Google Benchmark +#]=======================================================================] + +include(FindPackageHandleStandardArgs) +include(FindPackageMessage) + +macro(_finalize_find_package_benchmark) + find_package_handle_standard_args( + benchmark + FOUND_VAR benchmark_FOUND + REQUIRED_VARS benchmark_INCLUDE_DIR benchmark_LIBRARY benchmark_main_LIBRARY) + + if(benchmark_FOUND) + add_library(benchmark::benchmark UNKNOWN IMPORTED) + set_target_properties( + benchmark::benchmark PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${benchmark_INCLUDE_DIR}" + IMPORTED_LOCATION "${benchmark_LIBRARY}") + add_library(benchmark::benchmark_main UNKNOWN IMPORTED) + set_target_properties( + benchmark::benchmark_main PROPERTIES + IMPORTED_LOCATION "${benchmark_main_LIBRARY}") + endif() + + mark_as_advanced(benchmark_FOUND benchmark_INCLUDE_DIR benchmark_LIBRARY benchmark_main_LIBRARY) +endmacro() + +if(NOT benchmark_ROOT) + set(benchmark_ROOT $ENV{benchmark_ROOT}) +endif() + +find_path(benchmark_INCLUDE_DIR + NAMES benchmark/benchmark.h + HINTS ${benchmark_ROOT} + DOC "Google Benchmark include directory") +if(NOT benchmark_INCLUDE_DIR) + _finalize_find_package_benchmark() + return() +endif() + +find_library(benchmark_LIBRARY + NAMES libbenchmark.a + HINTS ${benchmark_ROOT} + DOC "Google Benchmark library") +find_library(benchmark_main_LIBRARY + NAMES libbenchmark_main.a + HINTS ${benchmark_ROOT} + DOC "Google Benchmark main library") +if(NOT benchmark_LIBRARY OR NOT benchmark_main_LIBRARY) + _finalize_find_package_benchmark() + return() +endif() + +_finalize_find_package_benchmark() \ No newline at end of file diff --git a/cmake/Finddotnet.cmake b/cmake/Finddotnet.cmake index a5c3e8bc7eb..005c0fbd536 100644 --- a/cmake/Finddotnet.cmake +++ b/cmake/Finddotnet.cmake @@ -71,6 +71,8 @@ function(dotnet_build project_file_path) cmake_path(APPEND project_root_directory "bin" OUTPUT_VARIABLE project_binary_directory) cmake_path(APPEND project_binary_directory "${project}" OUTPUT_VARIABLE + project_binary_directory) + cmake_path(APPEND project_binary_directory "${project}.dll" OUTPUT_VARIABLE project_binary_path) message( STATUS "Building project ${project} using dotnet, in ${configuration} mode") @@ -80,6 +82,7 @@ function(dotnet_build project_file_path) COMMAND ${dotnet_EXECUTABLE} ARGS build ${project_file_path} --configuration "${configuration}" --output "${project_binary_directory}" --self-contained + "false" -p:UseAppHost=false -p:PublishSingleFile=false DEPENDS ${ARG_SOURCE} WORKING_DIRECTORY "${project_root_directory}" COMMENT "Build ${project} using .NET framework") diff --git a/cmake/FlowCommands.cmake b/cmake/FlowCommands.cmake index cee4727871c..acc81ea1cd2 100644 --- a/cmake/FlowCommands.cmake +++ b/cmake/FlowCommands.cmake @@ -29,6 +29,9 @@ set(PASS_COMPILATION_UNIT ) function(generate_coverage_xml) + if(NOT COVERAGETOOL_AVAILABLE) + return() + endif() if(NOT (${ARGC} EQUAL "1")) message(FATAL_ERROR "generate_coverage_xml expects one argument") endif() @@ -85,7 +88,7 @@ function(generate_coverage_xml) else() add_custom_command( OUTPUT ${target_file} - COMMAND ${coveragetool_exe} ${target_file} ${in_files} + COMMAND ${coveragetool_command} ${target_file} ${in_files} DEPENDS ${in_files} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMENT "Generate coverage xml") @@ -268,9 +271,11 @@ function(add_flow_target) set(cs_out_file "${out_file}.cs_gen") add_custom_command(OUTPUT "${out_file}" COMMAND ${CMAKE_COMMAND} -E env "PYTHONPATH=${CMAKE_SOURCE_DIR}" - ${ACTORCOMPILER_COMMAND} "${in_file}" "${py_out_file}" ${actor_compiler_flags} + ${ACTORCOMPILER_PY_COMMAND} "${in_file}" "${py_out_file}" ${actor_compiler_flags} COMMAND ${ACTORCOMPILER_CSHARP_COMMAND} "${in_file}" "${cs_out_file}" ${actor_compiler_flags} - COMMAND ${Python3_EXECUTABLE} ${CMAKE_SOURCE_DIR}/flow/actorcompiler_py/compare_actor_output.py "${cs_out_file}" "${py_out_file}" + COMMAND ${Python3_EXECUTABLE} + ${CMAKE_SOURCE_DIR}/flow/actorcompiler_py/compare_actor_output.py + "${cs_out_file}" "${py_out_file}" COMMAND ${CMAKE_COMMAND} -E copy "${py_out_file}" "${out_file}" DEPENDS "${in_file}" actorcompiler COMMENT "Compile and compare actor: ${src}") @@ -327,6 +332,17 @@ function(add_flow_target) set_property(TARGET ${AFT_NAME} PROPERTY COVERAGE_FILTERS ${AFT_SRCS}) add_custom_target(${AFT_NAME}_actors DEPENDS ${generated_files}) + if(TARGET fdboptions AND NOT "${AFT_NAME}" STREQUAL "fdboptions") + if(DEFINED FDB_OPTIONS_H) + set_source_files_properties(${sources} ${AFT_ADDL_SRCS} + APPEND PROPERTY OBJECT_DEPENDS ${FDB_OPTIONS_H}) + endif() + add_dependencies(${AFT_NAME}_actors fdboptions) + add_dependencies(${AFT_NAME} fdboptions) + if(TARGET fdboptions_vex) + add_dependencies(${AFT_NAME}_actors fdboptions_vex) + endif() + endif() add_dependencies(${AFT_NAME} ${AFT_NAME}_actors) generate_coverage_xml(${AFT_NAME}) if(strip_target) diff --git a/cmake/RocksDBVersion.cmake b/cmake/RocksDBVersion.cmake new file mode 100644 index 00000000000..3b0c349640f --- /dev/null +++ b/cmake/RocksDBVersion.cmake @@ -0,0 +1,30 @@ +# RocksDB Version Configuration +# ============================== +# Edit this file to change the RocksDB version used by FDB. +# +# Choose ONE of the two options below by uncommenting the appropriate section. +# Do NOT set both - CMake will error if both are configured. +# +# CMake will automatically generate fdbserver/include/fdbserver/FDBRocksDBVersion.h +# based on the version specified here. Do NOT edit that file manually. +# +# To get SHA256: +# curl -sL https://github.com/facebook/rocksdb/archive/.tar.gz | sha256sum +# (where is either a version tag like "v9.7.3" or a commit hash) + +############################################################################### +# OPTION 1: RocksDB Release Number +# If you use this option, make sure Option 2 below is commented out. +############################################################################### +set(ROCKSDB_VERSION "9.7.3") +set(ROCKSDB_VERSION_SHA256 "acfabb989cbfb5b5c4d23214819b059638193ec33dad2d88373c46448d16d38b") + +############################################################################### +# OPTION 2: RocksDB Git Commit Hash +# If you use this option, make sure Option 1 above is commented out. +# +# Note: CMake will auto-fetch the version from GitHub at configure time. +# This requires network access during cmake configure. +############################################################################### +# set(ROCKSDB_GIT_HASH "2732f118497ab75cd2e44bc327746be180b42dcf") +# set(ROCKSDB_GIT_HASH_SHA256 "5f0dd06680c0bf302abb9bc70b4698fdcd0d5623c7264c8b3af7a1fe4f8b3078") diff --git a/contrib/Implib.so/implib-gen.py b/contrib/Implib.so/implib-gen.py index 9d96324afb5..288e188e829 100755 --- a/contrib/Implib.so/implib-gen.py +++ b/contrib/Implib.so/implib-gen.py @@ -100,7 +100,7 @@ def collect_syms(f): if name in syms_set: continue syms_set.add(name) - sym["Size"] = int(sym["Size"], 0) # Readelf is inconistent on Size format + sym["Size"] = int(sym["Size"], 0) # Readelf is inconsistent on Size format if "@" in name: sym["Default"] = "@@" in name name, ver = re.split(r"@+", name) diff --git a/contrib/Joshua/scripts/correctnessTest.sh b/contrib/Joshua/scripts/correctnessTest.sh index ba84facc6b9..32f5cd8b3c1 100755 --- a/contrib/Joshua/scripts/correctnessTest.sh +++ b/contrib/Joshua/scripts/correctnessTest.sh @@ -244,6 +244,11 @@ if [ "${TH_ARCHIVE_LOGS_ON_FAILURE}" = "true" ]; then PYTHON_CMD_ARGS+=("--archive-logs-on-failure") fi +if [ -f "test_args.txt" ]; then + echo "Found test_args.txt - running with specific test arguments" >&2 + PYTHON_CMD_ARGS+=("--test-args-file" "test_args.txt") +fi + # Setup joshua output capture PYTHON_APP_STDOUT_FILE="${APP_RUN_TEMP_DIR}/python_app_stdout.log" PYTHON_APP_STDERR_FILE="${APP_RUN_TEMP_DIR}/python_app_stderr.log" diff --git a/contrib/TestHarness2/README.md b/contrib/TestHarness2/README.md index 9e0c788d5c0..f9a180558e7 100644 --- a/contrib/TestHarness2/README.md +++ b/contrib/TestHarness2/README.md @@ -1,6 +1,6 @@ # FoundationDB TestHarness2 -This directory contains TestHarness2, a Python-based test harness for FoundationDB (that supercedes [`../TestHarness`](../TestHarness)). It can be used standalone or invoked by the Joshua testing framework via scripts like [`../Joshua/scripts/correctnessTest.sh`](../Joshua/scripts/correctnessTest.sh). +This directory contains TestHarness2, a Python-based test harness for FoundationDB (that supersedes [`../TestHarness`](../TestHarness)). It can be used standalone or invoked by the Joshua testing framework via scripts like [`../Joshua/scripts/correctnessTest.sh`](../Joshua/scripts/correctnessTest.sh). ## Quick Start @@ -167,7 +167,7 @@ Set environment variables when you start joshua, and [`correctnessTest.sh`](../J TestHarness2 integrates with [`../joshua_logtool.py`](../joshua_logtool.py) to automatically upload trace logs to a FoundationDB cluster for long-term storage and analysis when test failures occur. -TODO: Integerate joshua_logtool.py into TestHarness2 +TODO: Integrate joshua_logtool.py into TestHarness2 #### How joshua_logtool.py Works diff --git a/contrib/TestHarness2/example_test_args.txt b/contrib/TestHarness2/example_test_args.txt new file mode 100644 index 00000000000..33a102e46ef --- /dev/null +++ b/contrib/TestHarness2/example_test_args.txt @@ -0,0 +1 @@ +-f fast/CycleTest.toml -s 315315 -b off --reseed-time 100 diff --git a/contrib/TestHarness2/test_harness/config.py b/contrib/TestHarness2/test_harness/config.py index 3677c55333e..54c80c0cbb7 100644 --- a/contrib/TestHarness2/test_harness/config.py +++ b/contrib/TestHarness2/test_harness/config.py @@ -279,6 +279,12 @@ def __init__(self): "help": "If set, archive FDB logs and test harness outputs to a .tar.gz file in the joshua_output_dir on test failure.", "env_name": "TH_ARCHIVE_LOGS_ON_FAILURE", } + self.test_args_file: Path | None = None + self.test_args_file_args = { + "type": Path, + "required": False, + "help": "File containing test arguments (e.g., '-f tests/fast/CycleTest.toml -s 315315 -b off --reseed-time 100')", + } self._env_names: Dict[str, str] = {} self._config_map = self._build_map() diff --git a/contrib/TestHarness2/test_harness/run.py b/contrib/TestHarness2/test_harness/run.py index 404c6ab643f..d0133e588ab 100644 --- a/contrib/TestHarness2/test_harness/run.py +++ b/contrib/TestHarness2/test_harness/run.py @@ -23,6 +23,68 @@ from test_harness.summarize import Summary, SummaryTree +def parse_test_args_file(args_file: Path) -> tuple[Path, int, bool, List[str]]: + """ + Parse a test args file containing fdbserver arguments. + Expected format: '-f tests/fast/CycleTest.toml -s 315315 -b off --reseed-time 100' + + Returns: + tuple: (test_file, random_seed, buggify_enabled, extra_args) + extra_args contains any arguments not parsed (like --reseed-time 100) + """ + import shlex + + with open(args_file, 'r') as f: + content = f.read().strip() + + # Parse the command line arguments + args = shlex.split(content) + + test_file: Path | None = None + random_seed: int | None = None + buggify_enabled: bool = False + extra_args: List[str] = [] + + i = 0 + while i < len(args): + arg = args[i] + if arg == '-f' or arg == '--testfile': + if i + 1 < len(args): + test_file = Path(args[i + 1]) + i += 2 + else: + raise ValueError(f"Missing value for {arg}") + elif arg == '-s' or arg == '--seed': + if i + 1 < len(args): + random_seed = int(args[i + 1]) + i += 2 + else: + raise ValueError(f"Missing value for {arg}") + elif arg == '-b' or arg == '--buggify': + if i + 1 < len(args): + buggify_val = args[i + 1].lower() + buggify_enabled = buggify_val in ['on', '1', 'true'] + i += 2 + else: + raise ValueError(f"Missing value for {arg}") + else: + # This is an extra argument we don't parse (like --reseed-time) + extra_args.append(arg) + # If this arg takes a value, include it too + if i + 1 < len(args) and not args[i + 1].startswith('-'): + extra_args.append(args[i + 1]) + i += 2 + else: + i += 1 + + if test_file is None: + raise ValueError("Test file not specified in args file") + if random_seed is None: + raise ValueError("Random seed not specified in args file") + + return test_file, random_seed, buggify_enabled, extra_args + + @total_ordering class TestDescription: def __init__(self, path: Path, name: str, priority: float): @@ -367,6 +429,7 @@ def __init__( stats: str | None = None, expected_unseed: int | None = None, will_restart: bool = False, + extra_args: List[str] | None = None, ): self.binary = binary self.test_file = test_file @@ -380,6 +443,7 @@ def __init__( self.old_binary_path: Path = config.old_binaries_path self.buggify_enabled: bool = buggify_enabled self.fault_injection_enabled: bool = True + self.extra_args: List[str] = extra_args if extra_args is not None else [] self.trace_format: str | None = config.trace_format if Version.of_binary(self.binary) < "6.1.0": self.trace_format = None @@ -394,6 +458,7 @@ def __init__( expected_unseed=self.expected_unseed, will_restart=will_restart, long_running=config.long_running, + is_old_binary=False, # will be set after the run ) self.run_time: int = 0 self.success = self.run() @@ -509,6 +574,10 @@ def run(self): # disable traceTooManyLines Error MAX_TRACE_LINES command += ["--knob-max-trace-lines=1000000000"] + # Add any extra arguments from test_args_file + if self.extra_args: + command += self.extra_args + self.temp_path.mkdir(parents=True, exist_ok=True) # self.log_test_plan(out) @@ -539,6 +608,8 @@ def run(self): err_out: str = "" try: out_bytes, err_bytes = process.communicate(timeout=timeout) + # Check if we're running with an old binary (restarting test with non-current binary) + self.is_old_binary = (self.binary != config.binary) # Try normal UTF-8 decode first try: out = out_bytes.decode('utf-8') if out_bytes else "" @@ -548,9 +619,7 @@ def run(self): decode_error_occurred = True out = out_bytes.decode('utf-8', errors='replace') if out_bytes else "" err_out = err_bytes.decode('utf-8', errors='replace') if err_bytes else "" - # Check if we're running with an old binary (restarting test with non-current binary) - is_old_binary = self.binary != config.binary - if is_old_binary: + if self.is_old_binary: print(f"WARNING: UnicodeDecodeError at position {decode_ex.start} - invalid byte {hex(out_bytes[decode_ex.start])}. Output decoded with replacement. (Old binary - not failing test)", file=sys.stderr) # Don't fail tests for old binaries - we can't fix them else: @@ -575,16 +644,16 @@ def run(self): self.summary.was_killed = did_kill self.summary.valgrind_out_file = valgrind_file self.summary.error_out = err_out - + self.summary.is_old_binary = self.is_old_binary + # Add diagnostic info if decode error occurred (BEFORE summarize calculates Ok attribute) if decode_error_occurred: - is_old_binary = self.binary != config.binary decode_error_node = SummaryTree("UnicodeDecodeError") # Use Severity 30 (warning) for old binaries, 40 (error) for current binary - decode_error_node.attributes["Severity"] = "30" if is_old_binary else "40" + decode_error_node.attributes["Severity"] = "30" if self.is_old_binary else "40" decode_error_node.attributes["Message"] = "fdbserver output contained invalid UTF-8 bytes. Output decoded with replacement characters." decode_error_node.attributes["Position"] = "Check fdbserver.stdout for � characters" - if is_old_binary: + if self.is_old_binary: decode_error_node.attributes["Note"] = "Old binary - binary output tolerated" self.summary.out.append(decode_error_node) @@ -898,18 +967,23 @@ def _run_joshua_logtool_for_test(self, test_run): print(f"DEBUG: Conditions NOT met for joshua_logtool execution", file=sys.stderr) def run_tests( - self, test_files: List[Path], seed: int, test_picker: TestPicker + self, test_files: List[Path], seed: int, test_picker: TestPicker, extra_args: List[str] | None = None, override_buggify: bool | None = None, track_stats: bool = True ) -> bool: result: bool = True for count, file in enumerate(test_files): will_restart = count + 1 < len(test_files) binary = self.binary_chooser.choose_binary(file) + # Skip determinism check when running from test_args_file (track_stats=False) unseed_check = ( - not is_no_sim(file) + track_stats + and not is_no_sim(file) and config.random.random() < config.unseed_check_ratio ) buggify_enabled: bool = False - if config.buggify.value == BuggifyOptionValue.ON: + if override_buggify is not None: + # Use the buggify setting from the args file + buggify_enabled = override_buggify + elif config.buggify.value == BuggifyOptionValue.ON: buggify_enabled = True elif config.buggify.value == BuggifyOptionValue.RANDOM: buggify_enabled = config.random.random() < config.buggify_on_ratio @@ -924,9 +998,11 @@ def run_tests( stats=test_picker.dump_stats(), will_restart=will_restart, buggify_enabled=buggify_enabled, + extra_args=extra_args, ) result = result and run.success - test_picker.add_time(test_files[0], run.run_time, run.summary.out) + if track_stats: + test_picker.add_time(test_files[0], run.run_time, run.summary.out) decorate_summary(run.summary.out, file, seed + count, run.buggify_enabled, run.temp_path) if ( unseed_check @@ -961,8 +1037,10 @@ def run_tests( expected_unseed=run.summary.unseed, will_restart=will_restart, buggify_enabled=buggify_enabled, + extra_args=extra_args, ) - test_picker.add_time(file, run2.run_time, run.summary.out) + if track_stats: + test_picker.add_time(file, run2.run_time, run.summary.out) decorate_summary( run2.summary.out, file, seed + count, run.buggify_enabled, run2.temp_path ) @@ -983,13 +1061,23 @@ def run_tests( return result def run(self) -> bool: - seed = ( - config.random_seed - if config.random_seed is not None - else config.random.randint(0, 2**32 - 1) - ) - test_files = self.test_picker.choose_test() - success = self.run_tests(test_files, seed, self.test_picker) + if config.test_args_file is not None: + # Parse the args file to get test parameters + test_file, seed, buggify_enabled, extra_args = parse_test_args_file(config.test_args_file) + # Convert relative test file path to absolute + if not test_file.is_absolute(): + test_file = config.test_source_dir / test_file + test_files = [test_file] + success = self.run_tests(test_files, seed, self.test_picker, extra_args=extra_args, override_buggify=buggify_enabled, track_stats=False) + else: + # Normal test execution flow + seed = ( + config.random_seed + if config.random_seed is not None + else config.random.randint(0, 2**32 - 1) + ) + test_files = self.test_picker.choose_test() + success = self.run_tests(test_files, seed, self.test_picker) # Check if we should preserve logs on failure archive_logs_on_failure = os.getenv("TH_ARCHIVE_LOGS_ON_FAILURE", "false").lower() in ("true", "1", "yes") diff --git a/contrib/TestHarness2/test_harness/summarize.py b/contrib/TestHarness2/test_harness/summarize.py index 031720b7f2d..94526018637 100644 --- a/contrib/TestHarness2/test_harness/summarize.py +++ b/contrib/TestHarness2/test_harness/summarize.py @@ -326,6 +326,7 @@ def __init__( error_out: str = None, will_restart: bool = False, long_running: bool = False, + is_old_binary: bool = False, ): self.binary = binary self.runtime: float = runtime @@ -354,6 +355,7 @@ def __init__( self.negative_test_success = False self.max_trace_time = -1 self.max_trace_time_type = "None" + self.is_old_binary: bool = is_old_binary if uid is not None: self.out.attributes["TestUID"] = str(uid) @@ -387,7 +389,8 @@ def summarize(self, trace_dir: Path, command: str): self.out.append(child) return self.summarize_files(trace_files[0]) - if config.joshua_dir is not None: + # Skip write_coverage for old binaries in restarting tests + if config.joshua_dir is not None and not self.is_old_binary: import test_harness.fdb test_harness.fdb.write_coverage( @@ -748,3 +751,13 @@ def stderr_severity(attrs: Dict[str, str]): self.stderr_severity = attrs["NewSeverity"] self.handler.add_handler(("Type", "StderrSeverity"), stderr_severity) + + def resetting_random_seed(attrs: Dict[str, str]): + child = SummaryTree("ResettingRandomSeed") + if "NewSeed" in attrs: + child.attributes["NewSeed"] = attrs["NewSeed"] + if "Time" in attrs: + child.attributes["Time"] = attrs["Time"] + self.out.append(child) + + self.handler.add_handler(("Type", "ResettingRandomSeed"), resetting_random_seed) diff --git a/contrib/benchmark_comparison.py b/contrib/benchmark_comparison.py new file mode 100644 index 00000000000..ada345f691c --- /dev/null +++ b/contrib/benchmark_comparison.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +""" +Generate comprehensive actor vs coroutine benchmark comparison report. + +USAGE: + cd build_output # or your build directory with bin/flowbench + python3 ../contrib/benchmark_comparison.py + +REQUIREMENTS: + - Working bin/flowbench executable + - Both actor and coroutine benchmarks available: + * bench_net2, coroutine_net2 + * bench_delay.*DELAY.*, coroutine_delay_bench + * bench_delay.*YIELD.*, coroutine_yield_bench + * bench_callback, coroutine_callback + +OUTPUT: + Complete comparison report with: + - DELAY benchmarks (DELAY + YIELD variants, all scales) + - NET2 benchmarks (allocation-heavy patterns, all scales) + - CALLBACK benchmarks (various template sizes and scales) + - OVERALL_GEOMEAN calculations for each section + +This tool matches the historical format used in coroutine optimization reports +and provides statistical analysis of performance differences across all +benchmark categories. +""" +import json, math, subprocess, re + +def geomean(values): + if not values: return 0 + return math.exp(sum(math.log(1 + abs(v)) for v in values) / len(values)) - 1 + +def get_benchmark_data(filter_name): + """Get benchmark data and parse results""" + result = subprocess.run(['./bin/flowbench', f'--benchmark_filter={filter_name}', '--benchmark_format=json'], + capture_output=True, text=True, cwd='/root/build_output') + if result.returncode == 0 and result.stdout.strip(): + return json.loads(result.stdout)['benchmarks'] + return [] + +def main(): + print("Optimize coroutine final_suspend() - NET2 benchmark performance") + print("") + print("Benchmark Time CPU Time Old Time New CPU Old CPU New") + print("---------------------------------------------------------------------------------------------------------------------------------------------") + + # Section 1: DELAY Comparison (DELAY + YIELD) + delay_changes = [] + + # DELAY benchmarks + delay_actors = get_benchmark_data('bench_delay.*DELAY.*') + delay_coros = get_benchmark_data('coroutine_delay_bench') + + for scale in [0, 1, 8, 64, 512, 4096, 32768, 65536]: + actor_bench = next((a for a in delay_actors if a['name'].endswith(f'/{scale}')), None) + coro_bench = next((c for c in delay_coros if c['name'].endswith(f'/{scale}')), None) + + if actor_bench and coro_bench: + time_old, time_new = actor_bench['real_time'], coro_bench['real_time'] + cpu_old, cpu_new = actor_bench['cpu_time'], coro_bench['cpu_time'] + time_change = (time_new - time_old) / time_old + cpu_change = (cpu_new - cpu_old) / cpu_old + delay_changes.extend([time_change, cpu_change]) + + print(f"[bench_delay vs. coroutine_delay]/{scale} {time_change:+7.4f} {cpu_change:+7.4f} {time_old:>8} {time_new:>8} {cpu_old:>8} {cpu_new:>8}") + + # YIELD benchmarks + yield_actors = get_benchmark_data('bench_delay.*YIELD.*') + yield_coros = get_benchmark_data('coroutine_yield_bench') + + for scale in [0, 1, 8, 64, 512, 4096, 32768, 65536]: + actor_bench = next((a for a in yield_actors if a['name'].endswith(f'/{scale}')), None) + coro_bench = next((c for c in yield_coros if c['name'].endswith(f'/{scale}')), None) + + if actor_bench and coro_bench: + time_old, time_new = actor_bench['real_time'], coro_bench['real_time'] + cpu_old, cpu_new = actor_bench['cpu_time'], coro_bench['cpu_time'] + time_change = (time_new - time_old) / time_old + cpu_change = (cpu_new - cpu_old) / cpu_old + delay_changes.extend([time_change, cpu_change]) + + print(f"[bench_delay vs. coroutine_delay]/{scale} {time_change:+7.4f} {cpu_change:+7.4f} {time_old:>8} {time_new:>8} {cpu_old:>8} {cpu_new:>8}") + + if delay_changes: + geom = geomean(delay_changes) + print(f"OVERALL_GEOMEAN {geom:+7.4f} {geom:+7.4f} 0 0 0 0") + + print("") + print("Comparing bench_net2 to coroutine_net2 (from ./build_output/bin/flowbench)") + print("Benchmark Time CPU Time Old Time New CPU Old CPU New") + print("----------------------------------------------------------------------------------------------------------------------------") + + # Section 2: NET2 Comparison + net2_actors = get_benchmark_data('bench_net2') + net2_coros = get_benchmark_data('coroutine_net2') + net2_changes = [] + + for actor in net2_actors: + scale = actor['name'].split('/')[-1] + coro = next((c for c in net2_coros if c['name'].endswith('/' + scale)), None) + + if coro: + time_old, time_new = actor['real_time'], coro['real_time'] + cpu_old, cpu_new = actor['cpu_time'], coro['cpu_time'] + time_change = (time_new - time_old) / time_old + cpu_change = (cpu_new - cpu_old) / cpu_old + net2_changes.extend([time_change, cpu_change]) + + print(f"[bench_net2 vs. coroutine_net2]/{scale} {time_change:+7.4f} {cpu_change:+7.4f} {time_old:>8} {time_new:>8} {cpu_old:>8} {cpu_new:>8}") + + if net2_changes: + geom = geomean(net2_changes) + print(f"OVERALL_GEOMEAN {geom:+7.4f} {geom:+7.4f} 0 0 0 0") + + print("") + print("Comparing bench_callback to coroutine_callback (from ./build_output/bin/flowbench)") + print("Benchmark Time CPU Time Old Time New CPU Old CPU New") + print("---------------------------------------------------------------------------------------------------------------------------------------------") + + # Section 3: CALLBACK Comparison + callback_actors = get_benchmark_data('bench_callback') + callback_coros = get_benchmark_data('coroutine_callback') + callback_changes = [] + + for actor in callback_actors: + name = actor['name'] + # Parse bench_callback<1>/64 format + size_match = re.search(r'<(\d+)>', name) + scale_match = re.search(r'/(\d+)$', name) + + if size_match and scale_match: + size, scale = size_match.group(1), scale_match.group(1) + coro = next((c for c in callback_coros if f'<{size}>' in c['name'] and c['name'].endswith(f'/{scale}')), None) + + if coro: + time_old, time_new = actor['real_time'], coro['real_time'] + cpu_old, cpu_new = actor['cpu_time'], coro['cpu_time'] + time_change = (time_new - time_old) / time_old + cpu_change = (cpu_new - cpu_old) / cpu_old + callback_changes.extend([time_change, cpu_change]) + + print(f"[bench_callback vs. coroutine_callback]<{size}>/{scale} {time_change:+7.4f} {cpu_change:+7.4f} {time_old:>8} {time_new:>8} {cpu_old:>8} {cpu_new:>8}") + + if callback_changes: + geom = geomean(callback_changes) + print(f"OVERALL_GEOMEAN {geom:+7.4f} {geom:+7.4f} 0 0 0 0") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/contrib/fdbcstat/README.md b/contrib/fdbcstat/README.md index a9c6f89acb4..531d1b7f34f 100644 --- a/contrib/fdbcstat/README.md +++ b/contrib/fdbcstat/README.md @@ -29,7 +29,7 @@ `fdbcstat /usr/lib64/libfdb_c.so` ##### Collect all statistics for PID 12345 for 60 seconds with 10 second interval `fdbcstat /usr/lib64/libfdb_c.so -p 12345 -d 60 -i 10` -##### Collect statitics only for get and commit +##### Collect statistics only for get and commit `fdbcstat /usr/lib64/libfdb_c.so -f get,commit` ## Output Format diff --git a/contrib/gen_compile_db.py b/contrib/gen_compile_db.py index 358da5e2fef..8234e703e54 100644 --- a/contrib/gen_compile_db.py +++ b/contrib/gen_compile_db.py @@ -45,7 +45,7 @@ def actorCommand(cmd: str, build: str, src: str): swiftCompilationCommands = {} if len(args.ninjatool) > 0: - print("aquiring Swift compile commands using {}".format(args.ninjatool)) + print("acquiring Swift compile commands using {}".format(args.ninjatool)) try: ninjaInvocation = subprocess.run([args.ninjatool, "-t", "compdb"], cwd=args.builddir, capture_output=True) ninjaCMDs = json.loads(ninjaInvocation.stdout.decode('utf-8')) @@ -67,7 +67,7 @@ def actorCommand(cmd: str, build: str, src: str): for cmdFile in filesInCmd: swiftCompilationCommands[cmdFile] = {'file': cmdFile, 'command': cmd, 'directory': fileCmd['directory']} except: - print("error: failed to aquire Swift compilation commands") + print("error: failed to acquire Swift compilation commands") result = [] diff --git a/contrib/lint.py b/contrib/lint.py index 23ff6ce21a8..8f166696e4d 100755 --- a/contrib/lint.py +++ b/contrib/lint.py @@ -166,7 +166,7 @@ def _exit_scope(self, token: Token) -> Union[LinterIssue, None]: f"No {SCOPING_BEGIN_PUNCTUATION} matching {SCOPING_END_PUNCTUATION}, found {start_token.spelling}", ) self._debug_token( - start_token, f"Exitting scope starting from line {start_token.line}" + start_token, f"Exiting scope starting from line {start_token.line}" ) self._scope_stack.pop() @@ -190,7 +190,7 @@ def accept(self, token: Token) -> Union[LinterIssue, None]: self._scope_stack.append(token) return None elif self._is_scope_end(token): - self._debug_token(token, "Exitting scope") + self._debug_token(token, "Exiting scope") return self._exit_scope(token) elif self._is_scoping_keyword(token): self._debug_token(token, "Keyword found") diff --git a/contrib/mtlsbenchmark/client.sh b/contrib/mtlsbenchmark/client.sh index 003a9e41886..dafa2369a44 100644 --- a/contrib/mtlsbenchmark/client.sh +++ b/contrib/mtlsbenchmark/client.sh @@ -6,7 +6,7 @@ # test_remoteAddresses is set to connect to the server # test_targetDuration is set to 10 seconds for the client duration # test_connectionsOut is set to 10 for the number of outgoing connections -# knob_disable_mainthread_tls_handshake is enabled to use backgound threads for TLS handshakes only +# knob_disable_mainthread_tls_handshake is enabled to use background threads for TLS handshakes only # knob_tls_handshake_flowlock_priority is set to 8900 for enabling TLS flowlock priority as high as the handshake priority. Default is 7000. taskset -c 0-0 /root/build_output/bin/fdbserver \ @@ -20,4 +20,4 @@ taskset -c 0-0 /root/build_output/bin/fdbserver \ --tls_ca_file keys/ca_file.crt \ --tls_certificate_file keys/certificate_file.crt \ --tls_key_file keys/key_file.key \ - --tls_verify_peers "Root.CN=dummy-ca" \ No newline at end of file + --tls_verify_peers "Root.CN=dummy-ca" diff --git a/contrib/mtlsbenchmark/server.sh b/contrib/mtlsbenchmark/server.sh index b6e221c087f..037a9395252 100644 --- a/contrib/mtlsbenchmark/server.sh +++ b/contrib/mtlsbenchmark/server.sh @@ -7,7 +7,7 @@ # test_targetDuration is set to 0 for indefinite server run # knob_tls_handshake_limit is set to 1000 to limit the number of concurrent TLS # knob_tls_server_handshake_threads is set to 1 to use one thread for TLS handshakes -# knob_disable_mainthread_tls_handshake is enabled to use backgound threads for TLS handshakes only +# knob_disable_mainthread_tls_handshake is enabled to use background threads for TLS handshakes only # knob_tls_handshake_flowlock_priority is set to 8900 for enabling TLS flowlock priority as high as the handshake priority. Default is 7000. # knob_tls_handshake_timeout_seconds is set to 3.0 seconds to timeout a handshake if not completed in 3 seconds. The default is 2.0 seconds. @@ -24,4 +24,4 @@ taskset -c 1-1 /root/build_output/bin/fdbserver \ --tls_ca_file keys/ca_file.crt \ --tls_certificate_file keys/certificate_file.crt \ --tls_key_file keys/key_file.key \ - --tls_verify_peers "Root.CN=dummy-ca" \ No newline at end of file + --tls_verify_peers "Root.CN=dummy-ca" diff --git a/contrib/replay/CMakeLists.txt b/contrib/replay/CMakeLists.txt new file mode 100644 index 00000000000..85e393c707d --- /dev/null +++ b/contrib/replay/CMakeLists.txt @@ -0,0 +1,52 @@ +# replay - Go-based TUI tool for FDB trace files +cmake_minimum_required(VERSION 3.13) + +# Find Go (required for building replay) +find_program(GO_EXECUTABLE go) +if(NOT GO_EXECUTABLE) + message(WARNING "Go not found. The 'replay' target will not be available. Install Go 1.21+ to build replay.") + return() +endif() + +# Verify Go version +execute_process( + COMMAND ${GO_EXECUTABLE} version + OUTPUT_VARIABLE GO_VERSION_OUTPUT + OUTPUT_STRIP_TRAILING_WHITESPACE +) +message(STATUS "Found Go: ${GO_VERSION_OUTPUT}") + +# Set output directory for the binary +set(REPLAY_OUTPUT_DIR "${CMAKE_BINARY_DIR}/bin") +set(REPLAY_BINARY "${REPLAY_OUTPUT_DIR}/replay") + +# Ensure output directory exists +file(MAKE_DIRECTORY ${REPLAY_OUTPUT_DIR}) + +# Get all Go source files for dependency tracking +file(GLOB REPLAY_GO_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/*.go") + +# Custom target to build replay +# This handles all Go dependencies automatically - users just need Go installed +# ALL means it builds by default (but only if Go is available - see check above) +add_custom_target(replay ALL + COMMAND ${CMAKE_COMMAND} -E echo "Downloading Go dependencies..." + COMMAND ${GO_EXECUTABLE} mod download + COMMAND ${CMAKE_COMMAND} -E echo "Tidying Go modules..." + COMMAND ${GO_EXECUTABLE} mod tidy + COMMAND ${CMAKE_COMMAND} -E echo "Building replay..." + COMMAND ${GO_EXECUTABLE} build -o ${REPLAY_BINARY} . + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Building FDB trace replay tool (all dependencies handled automatically)" + BYPRODUCTS ${REPLAY_BINARY} + SOURCES ${REPLAY_GO_SOURCES} +) + +# Make sure the binary is executable +add_custom_command(TARGET replay POST_BUILD + COMMAND chmod +x ${REPLAY_BINARY} + COMMENT "Making replay executable" +) + +message(STATUS "replay will be built by default (Go found)") +message(STATUS "replay binary will be at: ${REPLAY_BINARY}") diff --git a/contrib/replay/README.md b/contrib/replay/README.md new file mode 100644 index 00000000000..3b0072352da --- /dev/null +++ b/contrib/replay/README.md @@ -0,0 +1,385 @@ +# replay + +``` + ⏪ ═══════════════════════ ⏩ + ★ + /| + / | + / | + / ✨| + / | + /_____| + ( o o ) + \ > / + ~~~~~~~~~~~ + \ ~~~~ / + \ / + ✨ | | ✨ + ◀──── | | ────▶ + / \ + ⌛ 🔮 ⌛ + ═══════════════════════ + FDB REPLAY + Time Travel Wizard +``` + +An interactive Terminal User Interface (TUI) for "replaying" FoundationDB simulation trace files. + +## Disclaimer + +**This tool is in EXPERIMENTAL stage.** + +- **Bugs likely exist** - this is early software, expect rough edges +- **Performance could be improved** - there's room for optimization +- **Interface can change** - keyboard shortcuts, UI layout, and features may evolve significantly based on user feedback + +I wanted to create a tool that I personally always wanted in FDB. Tired of grepping trace files and doing common patterns again and again, I wanted to codify the general techniques I use and data I look at while debugging failures - just making it very fast and easily available, while not regressing on core grep/less functionality. + +At the end, the goal is to make myself more productive, learn about FDB better, and in the future, potentially for others if they find this tool useful too. + +## Building + +**Prerequisites:** Go 1.21+ must be installed on the system. + +If Go is installed, `replay` is built automatically as part of the default FDB build. If Go is not installed, CMake will emit a warning during configuration and skip the `replay` target. The rest of the FDB build will proceed normally. + +### Using CMake + +The `replay` binary is built by default when you build FDB: + +```bash +cmake --build . # replay is included in the default build +cmake --build . --target replay # or build replay specifically +``` + +CMake handles everything automatically: +- Downloads all Go dependencies +- Builds the binary +- Places it in `bin/replay` + +### Manual Build + +If you prefer to build directly with Go: + +```bash +cd contrib/replay +go build -o replay . +``` + +### Usage + +```bash +replay [trace-file.xml] # Load specific trace file +replay # Auto-load latest trace*.xml in current directory +replay -h | --help # Show help +``` + +**Tip:** Create an alias `r` for quick access: +```bash +alias r=replay +``` + +## Why + +The main motivation is to be **fast in debugging simulation issues**, as well as **understanding how FDB works**. + +FDB simulation produces XML trace files that can be gigabytes in size with millions of events. Debugging a test failure typically involves: +- Grepping for specific event types +- Manually correlating events across time +- Tracking down which machines had which roles at a given moment +- Understanding recovery sequences and epoch boundaries +- Finding the needle in the haystack of millions of trace events + +This tool aims to make all of that faster and more intuitive. + +## Organic Development + +This tool is meant to be **organic** - it will grow and evolve over time. There will always be features to add, improvements to make, and new patterns to codify. + +**Feature Wishlist (always growing):** +- More navigation shortcuts for common patterns +- Better visualization of specific subsystems +- Performance optimizations for very large traces +- Additional filter types and search capabilities + +**Opportunity for Richer Logging:** + +Building this tool has revealed opportunities to improve FDB's trace logging itself. For example, I realized some trace events don't include the role ID, which is very useful since you can then see which role (e.g., ClusterController) the trace event is coming from, or even better, which specific StorageServer (out of dozens in simulation) the trace is coming from. + +The tool and the logging can evolve together - as we add features to the tool, we may discover places where richer trace events would help debugging. + +## Core Idea / Principles + +### File/Text Based +- Takes **only XML trace files** as input (the files produced by FDB simulation) +- No external dependencies, no database, no network - just the trace file itself +- Single low-dependency binary that runs anywhere + +### Time-Based Navigation +The core idea is to **navigate the trace file** and then, wherever you are, **"as of" that time/line, the tool reflects the state of the cluster**. + +Everything is based on where you are in the timeline: +- Cluster topology (which machines exist, what roles they have) +- DB configuration +- Recovery state +- Epoch/version information + +You can go back and forth in time, and the tool **reacts to that** - showing you the cluster state at that exact moment. + +From experience debugging FDB issues, you have to do multiple rounds of "back and forth" like this to narrow down the root cause of a bug. + +### Terminal UI (TUI) over GUI +- **No dependencies** apart from the trace file itself +- Runs on Linux environments **right inside the shell** where FDB devs run sim tests and inspect trace files +- Works over SSH, in containers, on any terminal + +### Why Go? +- **Excellent TUI library** (Bubbletea/Charmbracelet ecosystem) +- **Static typing** catches bugs early +- **Static binary** - no runtime dependencies +- **Potential for concurrency** if performance optimization is needed in the future + +## Features + +### Navigation + +| Key | Action | Description | +|-----|--------|-------------| +| `Ctrl+N` | Next event | Jump to the next visible event (respects filters) | +| `Ctrl+P` | Previous event | Jump to the previous visible event | +| `Ctrl+V` | Page forward | Jump ~1 second forward in time | +| `Alt+V` | Page backward | Jump ~1 second backward in time | +| `g` | Go to start | Jump to the first visible event | +| `G` / `Shift+G` | Go to end | Jump to the last visible event | +| `t` | Time jump | Open popup to enter a specific time in seconds | + +### Search + +| Key | Action | Description | +|-----|--------|-------------| +| `/` | Search forward | Enter search pattern (supports `*` wildcard) | +| `?` | Search backward | Enter search pattern (supports `*` wildcard) | +| `n` | Next match | Go to next match in original search direction | +| `N` / `Shift+N` | Previous match | Go to match in opposite direction | +| `Esc` | Clear search | Clear search highlighting | + +Search patterns support wildcards: `*Recovery*`, `Type=Master*`, `Machine=2.0.1.*` + +### Recovery Navigation + +| Key | Action | Description | +|-----|--------|-------------| +| `r` | Next recovery start | Jump to next `MasterRecoveryState` with `StatusCode=0` | +| `R` / `Shift+R` | Previous recovery start | Jump to previous recovery start | +| `e` | Next recovery event | Jump to any next `MasterRecoveryState` event | +| `E` / `Shift+E` | Previous recovery event | Jump to any previous recovery event | + +Recovery states are color-coded: +- **Red**: StatusCode < 11 (early recovery) +- **Blue**: StatusCode 11-13 (mid recovery) +- **Green**: StatusCode = 14 (fully recovered) + +### Severity Navigation + +| Key | Action | Description | +|-----|--------|-------------| +| `3` | Next warning | Jump to next `Severity=30` event | +| `#` / `Shift+3` | Previous warning | Jump to previous warning | +| `4` | Next error | Jump to next `Severity=40` event | +| `$` / `Shift+4` | Previous error | Jump to previous error | + +### Filtering + +Press `f` to open the filter configuration popup. Filters allow you to focus on specific events. + +**Filter Categories:** + +1. **Raw Filters** (pattern matching) + - Wildcard patterns like `Type=MasterRecoveryState`, `Role*TLog`, `Severity=40` + - Multiple patterns use OR logic (any match shows the event) + - Press `a` to add, `e` to edit, `r` to remove, `d` to disable/enable + - Press `t` to search and add by Type name (fuzzy search) + - Press `c` to toggle "common" event types (pre-defined important events) + +2. **Machine Filters** + - Select specific machines or entire data centers + - Press `Enter` to open machine selection popup + - Supports fuzzy search to find machines quickly + +3. **Time Range Filter** + - Show only events within a time window + - Press `d` to toggle, `Enter` to configure start/end times + +4. **Message Filter** + - Show only `NetworkMessageSent` events + - Press `d` to toggle + +**Filter Logic:** +- Categories combine with **AND** logic (event must match all active categories) +- Within each category, patterns use **OR** logic (event can match any pattern) +- Toggle "All" with `Space` to disable/enable all filtering + +### Views / Popups + +| Key | Action | Description | +|-----|--------|-------------| +| `c` | Config view | Show full DB configuration JSON (scrollable) | +| `x` | Health view | Show network latencies, degraded peers, connections | +| `h` | Help | Show all keyboard shortcuts | +| `f` | Filter | Configure event filters | + +**Config View (`c`):** +- Shows full JSON configuration at current time +- Scrollable with `Ctrl+N`/`Ctrl+P` +- Displays redundancy mode, log count, storage engine, etc. + +**Health View (`x`):** +- **Network Latencies**: PingLatency events showing min/max/median/P90 latencies +- **Degraded Peers**: HealthMonitorDetectDegradedPeer events +- **Connections**: Sim2Connection and SimulatedDisconnection events +- Shows top 5 entries per category, sorted by latency + +### Topology Display + +The left pane shows the **cluster topology** at the current time: +- Machines grouped by Data Center (DC) +- Testers shown separately +- Each machine shows its roles (StorageServer, TLog, Coordinator, etc.) +- Roles show ID and epoch where applicable: `TLog [abc123] (e=5)` +- Current event's machine is highlighted with `->` arrow +- Current event's role is highlighted if ID matches + +**NetworkMessageSent Visualization:** +- Source machine highlighted with yellow background and `-->` arrow +- Destination machine highlighted with `<--` arrow +- RPC name shown at bottom of topology pane + +### Status Bar + +The bottom of the screen shows: +- **DB Config**: Current database configuration summary +- **Recovery State**: Current recovery status with color coding +- **Epoch Info**: Current epoch, KCV, RV, recoveryTxnVersion +- **Time**: Current position in the trace timeline + +### General + +| Key | Action | +|-----|--------| +| `q` / `Q` / `Ctrl+C` | Quit | + +## Architecture + +``` + +------------------+ + | trace*.xml | + | (FDB sim trace) | + +--------+---------+ + | + v ++-----------------------------------------------------------------------------------+ +| main.go | +| - CLI argument parsing | +| - Auto-find latest trace*.xml if not specified | +| - Load and parse trace file | +| - Launch TUI | ++-----------------------------------------------------------------------------------+ + | + v ++-----------------------------------------------------------------------------------+ +| trace.go | +| - XML parsing (streaming decoder) | +| - TraceEvent struct (Time, Type, Machine, ID, Severity, Attrs) | +| - DBConfig parsing from MasterRecoveryState events | +| - RecoveryState tracking | +| - EpochVersionInfo tracking (from GetDurableResult, UpdateRegistration) | +| - Binary search for time-based lookups | ++-----------------------------------------------------------------------------------+ + | + v ++-----------------------------------------------------------------------------------+ +| cluster.go | +| - Worker/RoleInfo structs | +| - BuildClusterState() - reconstructs topology from Role events | +| - Address parsing (DC extraction, main vs tester) | +| - Epoch tracking from TLogStart, LogRouterStart, BackupWorkerStart | ++-----------------------------------------------------------------------------------+ + | + v ++-----------------------------------------------------------------------------------+ +| ui.go | +| - Bubbletea TUI framework (Model-Update-View pattern) | +| - Split-pane layout: Topology (left) | Events (right) | +| - Popup overlays: Help, Config, Health, Filter, Time Jump | +| - Navigation handlers (Ctrl+N/P, g/G, r/R, e/E, etc.) | +| - Search with wildcard support | +| - Filter system (Raw, Machine, Time, Message) | +| - NetworkMessageSent visualization | ++-----------------------------------------------------------------------------------+ + | + v + +------------------+ + | Terminal | + | (user sees) | + +------------------+ +``` + +### Component Responsibilities + +**main.go** (~120 lines) +- Entry point and CLI parsing +- Auto-finds latest `trace*.xml` if no argument provided +- Coordinates parsing and TUI launch + +**trace.go** (~515 lines) +- **TraceEvent**: Core data structure for each trace line +- **TraceData**: Container for all parsed data with time-based accessors +- **DBConfig**: Parsed database configuration from MasterRecoveryState +- **RecoveryState**: Recovery milestones with event indices +- **EpochVersionInfo**: Epoch/version tracking from multiple event types +- XML streaming parser with progress reporting +- Binary search for all time-based lookups + +**cluster.go** (~265 lines) +- **Worker**: Machine with roles and DC membership +- **RoleInfo**: Role name, ID, and epoch +- **ClusterState**: Map of all workers +- **BuildClusterState()**: Replays Role events to reconstruct topology +- Address parsing for DC identification (handles both IPv4 and IPv6 formats) + +**ui.go** (~4600 lines) +- **model**: All TUI state (current position, filters, popup modes) +- **Update()**: Handles all keyboard input +- **View()**: Renders the full screen +- Multi-column topology layout +- Event list with wrapping and scrolling +- All popup rendering (help, config, health, filter, time jump) +- Search and filter matching logic +- NetworkMessageSent visualization + +### Data Flow + +1. **Parse**: XML trace file -> TraceEvent array (sorted by time) +2. **Index**: Build DBConfig, RecoveryState, EpochVersionInfo arrays +3. **Navigate**: User moves through events via keyboard +4. **Reconstruct**: On each navigation, rebuild ClusterState from events[0:current] +5. **Render**: Display topology + events + status at current position + +### Performance Optimizations + +- **Streaming XML parser**: Doesn't load entire file into memory +- **Pre-allocation**: Event array sized based on file size estimate +- **Binary search**: All time-based lookups use binary search +- **Pre-compiled regex**: Filter patterns compiled once, cached +- **Set lookups**: Machine filters use O(1) map lookups +- **DC caching**: Machine-to-DC mapping cached per session +- **Progress reporting**: Every 100K events during loading + +## Dependencies + +Go dependencies are managed automatically by the build system. For reference: + +- Go 1.21+ +- [Bubbletea](https://github.com/charmbracelet/bubbletea) v0.25.0 - TUI framework +- [Lipgloss](https://github.com/charmbracelet/lipgloss) v0.9.1 - Styling +- [Bubbles](https://github.com/charmbracelet/bubbles) v0.18.0 - Text input widgets diff --git a/contrib/replay/TODOS.md b/contrib/replay/TODOS.md new file mode 100644 index 00000000000..217b791d6b4 --- /dev/null +++ b/contrib/replay/TODOS.md @@ -0,0 +1,7 @@ +- have history tab e.g. past recovery history, db config history, epoch history, etc. generally my principle has been "as of now" state, and no memory... but I guess I can extend this to "history as of now" and deem it as acceptable given its potential productivity gains +- have epoch associated with txn interfaces. useful to understand what generation they belong to, especially useful during recovery. + --- log routers, tlog, backup workers DONE. left: master, cp, grv proxy, resolver, anything else in txn system which is associated with an epoch? +- topology view nesting: role interface id child of a certain worker interface id, tlog id can be a child of a shared tlog id. visually show these. +- control plane metadata (srcs: system key space, commit proxy txn state store, role specific code, etc.) + --- range maps (key to shard, shard to key range, shard to ss, ss to shards) + --- buddies in txn (ss <-> tlog on primary and remote, remote tlog <-> LR, LR <-> primary/satellite tlog) diff --git a/contrib/replay/cluster.go b/contrib/replay/cluster.go new file mode 100644 index 00000000000..7da00479ce1 --- /dev/null +++ b/contrib/replay/cluster.go @@ -0,0 +1,263 @@ +package main + +import ( + "regexp" + "strings" +) + +// RoleInfo represents a role with its ID +type RoleInfo struct { + Name string // e.g., "StorageServer", "Coordinator" + ID string // e.g., "f5f3670ef3675364" + Epoch string // Generation/Epoch for TLog, LogRouter, BackupWorker (empty for others) +} + +// Worker represents a process in the cluster +type Worker struct { + Machine string // e.g., "[abcd::2:1:1:0]:1" + Roles []RoleInfo // Roles assigned to this worker (including "Worker" role) + MachineType string // "main" or "tester" + DCID string // e.g., "0", "1", "2", etc. +} + +// ClusterState represents the state of the cluster at a given time +type ClusterState struct { + Workers map[string]*Worker // Key: Machine address +} + +// NewClusterState creates a new empty cluster state +func NewClusterState() *ClusterState { + return &ClusterState{ + Workers: make(map[string]*Worker), + } +} + +// parseAddress extracts machine type and DC ID from address +// Format 1: [abcd::X:Y:Z:W]:Port where X=type (2=main, 3=tester), Y=DC ID +// Format 2: X.Y.Z.W:Port where X=type (2=main, 3=tester), Y=DC ID +func parseAddress(address string) (machineType string, dcID string) { + // Default values + machineType = "unknown" + dcID = "unknown" + + // Try format 1: [abcd::2:1:1:0]:1 + re1 := regexp.MustCompile(`\[abcd::(\d+):(\d+):`) + matches := re1.FindStringSubmatch(address) + + if len(matches) >= 3 { + typeNum := matches[1] + dcNum := matches[2] + + if typeNum == "2" { + machineType = "main" + } else if typeNum == "3" { + machineType = "tester" + } + + dcID = dcNum + return machineType, dcID + } + + // Try format 2: 2.0.1.3:1 + re2 := regexp.MustCompile(`^(\d+)\.(\d+)\.`) + matches = re2.FindStringSubmatch(address) + + if len(matches) >= 3 { + typeNum := matches[1] + dcNum := matches[2] + + if typeNum == "2" { + machineType = "main" + } else if typeNum == "3" { + machineType = "tester" + } + + dcID = dcNum + return machineType, dcID + } + + return machineType, dcID +} + +// BuildClusterState builds the cluster state from events up to a given time +func BuildClusterState(events []TraceEvent) *ClusterState { + state := NewClusterState() + + // Map to track epoch info by role ID (from metrics events) + epochByID := make(map[string]string) + + for _, event := range events { + // Extract epoch info from start events (preferred - happens at initialization) + // and metrics events (fallback - happens periodically) + switch event.Type { + case "TLogStart": + // TLog epoch is in the "RecoveryCount" attribute + if recoveryCount, ok := event.Attrs["RecoveryCount"]; ok && event.ID != "" { + epochByID[event.ID] = recoveryCount + } + case "LogRouterStart": + // LogRouter epoch is in the "Epoch" attribute + if epoch, ok := event.Attrs["Epoch"]; ok && event.ID != "" { + epochByID[event.ID] = epoch + } + case "BackupWorkerStart": + // BackupWorker recruited epoch is in "LogEpoch" attribute + if logEpoch, ok := event.Attrs["LogEpoch"]; ok && event.ID != "" { + epochByID[event.ID] = logEpoch + } + case "TLogMetrics": + // TLog epoch from metrics (fallback if start event missed) + if generation, ok := event.Attrs["Generation"]; ok && event.ID != "" { + if _, exists := epochByID[event.ID]; !exists { + epochByID[event.ID] = generation + } + } + case "LogRouterMetrics": + // LogRouter epoch from metrics (fallback if start event missed) + if generation, ok := event.Attrs["Generation"]; ok && event.ID != "" { + if _, exists := epochByID[event.ID]; !exists { + epochByID[event.ID] = generation + } + } + } + + if event.Type == "Role" && event.Machine != "0.0.0.0:0" { + transition := event.Attrs["Transition"] + roleName := event.Attrs["As"] + roleID := event.ID + + // Skip if no role name + if roleName == "" { + continue + } + + // Get or create worker + worker, exists := state.Workers[event.Machine] + if !exists { + machineType, dcID := parseAddress(event.Machine) + worker = &Worker{ + Machine: event.Machine, + Roles: []RoleInfo{}, + MachineType: machineType, + DCID: dcID, + } + state.Workers[event.Machine] = worker + } + + // Handle role transitions (including "Worker" role) + if transition == "Begin" { + // Add role if not already present + hasRole := false + for _, r := range worker.Roles { + if r.Name == roleName && r.ID == roleID { + hasRole = true + break + } + } + if !hasRole { + worker.Roles = append(worker.Roles, RoleInfo{ + Name: roleName, + ID: roleID, + Epoch: epochByID[roleID], // May be empty if metrics not seen yet + }) + } + } else if transition == "End" { + // Remove role with matching name and ID + newRoles := []RoleInfo{} + for _, r := range worker.Roles { + if !(r.Name == roleName && r.ID == roleID) { + newRoles = append(newRoles, r) + } + } + worker.Roles = newRoles + } + // "Refresh" transitions don't change state, just skip them + } + } + + // Second pass: Update roles with epoch info that may have arrived after the Role event + for _, worker := range state.Workers { + for i := range worker.Roles { + if worker.Roles[i].Epoch == "" { + if epoch, ok := epochByID[worker.Roles[i].ID]; ok { + worker.Roles[i].Epoch = epoch + } + } + } + } + + return state +} + +// GetWorkersByDC returns workers grouped by DC ID (main machines only) +func (cs *ClusterState) GetWorkersByDC() map[string][]*Worker { + dcMap := make(map[string][]*Worker) + + for _, w := range cs.Workers { + if w.MachineType == "main" { + dcMap[w.DCID] = append(dcMap[w.DCID], w) + } + } + + // Sort workers within each DC by machine address for consistent ordering + for _, workers := range dcMap { + for i := 0; i < len(workers); i++ { + for j := i + 1; j < len(workers); j++ { + if workers[i].Machine > workers[j].Machine { + workers[i], workers[j] = workers[j], workers[i] + } + } + } + } + + return dcMap +} + +// GetTesters returns all tester workers +func (cs *ClusterState) GetTesters() []*Worker { + testers := []*Worker{} + + for _, w := range cs.Workers { + if w.MachineType == "tester" { + testers = append(testers, w) + } + } + + // Sort testers by machine address for consistent ordering + for i := 0; i < len(testers); i++ { + for j := i + 1; j < len(testers); j++ { + if testers[i].Machine > testers[j].Machine { + testers[i], testers[j] = testers[j], testers[i] + } + } + } + + return testers +} + +// HasRoles returns true if the worker has any roles assigned +func (w *Worker) HasRoles() bool { + return len(w.Roles) > 0 +} + +// HasNonWorkerRoles returns true if the worker has any roles other than "Worker" +func (w *Worker) HasNonWorkerRoles() bool { + for _, role := range w.Roles { + if role.Name != "Worker" { + return true + } + } + return false +} + +// RolesString returns a comma-separated string of roles +func (w *Worker) RolesString() string { + if len(w.Roles) == 0 { + return "" + } + roleNames := make([]string, len(w.Roles)) + for i, r := range w.Roles { + roleNames[i] = r.Name + } + return strings.Join(roleNames, ", ") +} diff --git a/contrib/replay/go.mod b/contrib/replay/go.mod new file mode 100644 index 00000000000..a4716b12afc --- /dev/null +++ b/contrib/replay/go.mod @@ -0,0 +1,28 @@ +module github.com/apple/foundationdb/contrib/replay + +go 1.21 + +require ( + github.com/charmbracelet/bubbles v0.18.0 + github.com/charmbracelet/bubbletea v0.25.0 + github.com/charmbracelet/lipgloss v0.9.1 +) + +require ( + github.com/atotto/clipboard v0.1.4 // indirect + github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect + github.com/containerd/console v1.0.4-0.20230313162750-1ae8d489ac81 // indirect + github.com/lucasb-eyer/go-colorful v1.2.0 // indirect + github.com/mattn/go-isatty v0.0.18 // indirect + github.com/mattn/go-localereader v0.0.1 // indirect + github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b // indirect + github.com/muesli/cancelreader v0.2.2 // indirect + github.com/muesli/reflow v0.3.0 // indirect + github.com/muesli/termenv v0.15.2 // indirect + github.com/rivo/uniseg v0.4.6 // indirect + golang.org/x/sync v0.1.0 // indirect + golang.org/x/sys v0.12.0 // indirect + golang.org/x/term v0.6.0 // indirect + golang.org/x/text v0.3.8 // indirect +) diff --git a/contrib/replay/go.sum b/contrib/replay/go.sum new file mode 100644 index 00000000000..3adf3f98d2e --- /dev/null +++ b/contrib/replay/go.sum @@ -0,0 +1,43 @@ +github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= +github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= +github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= +github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= +github.com/charmbracelet/bubbles v0.18.0 h1:PYv1A036luoBGroX6VWjQIE9Syf2Wby2oOl/39KLfy0= +github.com/charmbracelet/bubbles v0.18.0/go.mod h1:08qhZhtIwzgrtBjAcJnij1t1H0ZRjwHyGsy6AL11PSw= +github.com/charmbracelet/bubbletea v0.25.0 h1:bAfwk7jRz7FKFl9RzlIULPkStffg5k6pNt5dywy4TcM= +github.com/charmbracelet/bubbletea v0.25.0/go.mod h1:EN3QDR1T5ZdWmdfDzYcqOCAps45+QIJbLOBxmVNWNNg= +github.com/charmbracelet/lipgloss v0.9.1 h1:PNyd3jvaJbg4jRHKWXnCj1akQm4rh8dbEzN1p/u1KWg= +github.com/charmbracelet/lipgloss v0.9.1/go.mod h1:1mPmG4cxScwUQALAAnacHaigiiHB9Pmr+v1VEawJl6I= +github.com/containerd/console v1.0.4-0.20230313162750-1ae8d489ac81 h1:q2hJAaP1k2wIvVRd/hEHD7lacgqrCPS+k8g1MndzfWY= +github.com/containerd/console v1.0.4-0.20230313162750-1ae8d489ac81/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= +github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= +github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= +github.com/mattn/go-isatty v0.0.18 h1:DOKFKCQ7FNG2L1rbrmstDN4QVRdS89Nkh85u68Uwp98= +github.com/mattn/go-isatty v0.0.18/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= +github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= +github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= +github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= +github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b h1:1XF24mVaiu7u+CFywTdcDo2ie1pzzhwjt6RHqzpMU34= +github.com/muesli/ansi v0.0.0-20211018074035-2e021307bc4b/go.mod h1:fQuZ0gauxyBcmsdE3ZT4NasjaRdxmbCS0jRHsrWu3Ho= +github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= +github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= +github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s= +github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= +github.com/muesli/termenv v0.15.2 h1:GohcuySI0QmI3wN8Ok9PtKGkgkFIk7y6Vpb5PvrY+Wo= +github.com/muesli/termenv v0.15.2/go.mod h1:Epx+iuz8sNs7mNKhxzH4fWXGNpZwUaJKRS1noLXviQ8= +github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= +github.com/rivo/uniseg v0.4.6 h1:Sovz9sDSwbOz9tgUy8JpT+KgCkPYJEN/oYzlJiYTNLg= +github.com/rivo/uniseg v0.4.6/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.6.0 h1:clScbb1cHjoCkyRbWwBEUZ5H/tIFu5TAXIqaZD0Gcjw= +golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= +golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= diff --git a/contrib/replay/main.go b/contrib/replay/main.go new file mode 100644 index 00000000000..7cc7df1853f --- /dev/null +++ b/contrib/replay/main.go @@ -0,0 +1,119 @@ +package main + +import ( + "fmt" + "os" + "path/filepath" + "time" +) + +func main() { + if err := run(); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func run() error { + var traceFile string + + // Check for help flag + if len(os.Args) == 2 && (os.Args[1] == "-h" || os.Args[1] == "--help") { + printHelp() + return nil + } + + // Check command-line arguments + if len(os.Args) == 2 { + // Explicit file provided + traceFile = os.Args[1] + fmt.Fprintf(os.Stderr, "Loading trace file: %s\n", traceFile) + } else if len(os.Args) == 1 { + // No argument - find latest trace*.xml in current directory + latestFile, err := findLatestTraceFile() + if err != nil { + return fmt.Errorf("no trace file specified and failed to find latest trace*.xml: %w", err) + } + traceFile = latestFile + fmt.Fprintf(os.Stderr, "Loading latest trace file: %s\n", traceFile) + } else { + return fmt.Errorf("usage: %s [trace-file.xml]", os.Args[0]) + } + + // Parse the trace file + traceData, err := parseTraceFile(traceFile) + if err != nil { + return fmt.Errorf("failed to parse trace file: %w", err) + } + + fmt.Fprintf(os.Stderr, "Loaded %d events (%.2fs - %.2fs)\n", + len(traceData.Events), traceData.MinTime, traceData.MaxTime) + fmt.Fprintf(os.Stderr, "Found %d DB configurations\n", len(traceData.Configs)) + fmt.Fprintf(os.Stderr, "Found %d recovery states\n", len(traceData.RecoveryStates)) + + // Start the TUI + return runUI(traceData) +} + +// printHelp prints usage and help information +func printHelp() { + fmt.Fprintf(os.Stderr, `replay - Interactive TUI for replaying FDB simulation trace files + +Usage: + replay [trace-file.xml] + replay -h | --help + +Arguments: + trace-file.xml Path to FDB trace XML file (optional) + If not provided, uses the most recently modified trace*.xml + file in the current directory + +Options: + -h, --help Show this help message + +Examples: + replay trace.xml # Load specific trace file + replay # Auto-load latest trace*.xml in current directory + +Tip: + Create an alias for quick access: alias r=replay + +Interactive Commands: + Press 'h' within the application to see all available navigation commands. +`) +} + +// findLatestTraceFile finds the most recently modified trace*.xml file in the current directory +func findLatestTraceFile() (string, error) { + // Get all trace*.xml files in current directory + matches, err := filepath.Glob("trace*.xml") + if err != nil { + return "", err + } + + if len(matches) == 0 { + return "", fmt.Errorf("no trace*.xml files found in current directory") + } + + // Find the most recently modified file + var latestFile string + var latestTime time.Time + + for _, file := range matches { + info, err := os.Stat(file) + if err != nil { + continue // Skip files we can't stat + } + + if latestFile == "" || info.ModTime().After(latestTime) { + latestFile = file + latestTime = info.ModTime() + } + } + + if latestFile == "" { + return "", fmt.Errorf("no accessible trace*.xml files found") + } + + return latestFile, nil +} diff --git a/contrib/replay/trace.go b/contrib/replay/trace.go new file mode 100644 index 00000000000..3c042855658 --- /dev/null +++ b/contrib/replay/trace.go @@ -0,0 +1,514 @@ +package main + +import ( + "encoding/json" + "encoding/xml" + "fmt" + "html" + "io" + "os" + "sort" + "strconv" +) + +// TraceEvent represents a single event from the trace file. +// FDB trace events have variable attributes depending on the event type, +// so we store them as a map of key-value pairs. +type TraceEvent struct { + Severity string + Time string + DateTime string + Type string + Machine string + ID string + + // Parsed time as float for easy comparison + TimeValue float64 + + // Additional attributes specific to event type + Attrs map[string]string +} + +// DBConfig represents the database configuration +type DBConfig struct { + Time float64 + RedundancyMode string `json:"redundancy_mode"` + UsableRegions int `json:"usable_regions"` + Logs int `json:"logs"` + LogRouters int `json:"log_routers"` + RemoteLogs int `json:"remote_logs"` + Proxies int `json:"proxies"` + GrvProxies int `json:"grv_proxies"` + BackupWorkerEnabled int `json:"backup_worker_enabled"` + StorageEngine string `json:"storage_engine"` + RemoteRedundancyMode string `json:"remote_redundancy_mode"` + TenantMode string `json:"tenant_mode"` + // Add other fields as needed + RawJSON map[string]interface{} // Full JSON for reference +} + +// RecoveryState represents a MasterRecoveryState event +type RecoveryState struct { + Time float64 + StatusCode string + Status string + EventIndex int // Index of this event in the Events slice +} + +// EpochVersionInfo tracks version information per epoch/generation +// Populated from FinalRecoveryVersionInfo (KCV, RV) and UpdateRegistration (epoch, recoveryTxnVersion) +type EpochVersionInfo struct { + Time float64 // Time when this info was fully populated + Epoch int64 // RecoveryCount from UpdateRegistration + KCV int64 // Known committed version from FinalRecoveryVersionInfo + RV int64 // Recovery version (MinEnd) from FinalRecoveryVersionInfo + RecoveryTxnVersion int64 // Recovery transaction version from UpdateRegistration + LastEpochEnd int64 // LastEpochEnd from UpdateRegistration (should match RV) + EventIndex int // Index of the event that completed this info + HasKCV bool // Whether KCV/RV has been populated + HasRecoveryTxnVersion bool // Whether epoch/recoveryTxnVersion has been populated +} + +// TraceData holds the parsed trace file and provides time-based access +type TraceData struct { + Events []TraceEvent + Configs []DBConfig // Database configurations over time + RecoveryStates []RecoveryState + EpochVersions []EpochVersionInfo // Epoch version info over time + MinTime float64 + MaxTime float64 + TimeStep float64 // Default time increment for scrubbing +} + +// parseTraceFile reads an XML trace file and returns TraceData. +func parseTraceFile(filepath string) (*TraceData, error) { + file, err := os.Open(filepath) + if err != nil { + return nil, fmt.Errorf("failed to open trace file: %w", err) + } + defer file.Close() + + // Get file size to estimate event count for pre-allocation + fileInfo, _ := file.Stat() + fileSize := fileInfo.Size() + // Estimate ~400 bytes per event on average, pre-allocate to reduce reallocations + estimatedEvents := int(fileSize / 400) + if estimatedEvents < 1000 { + estimatedEvents = 1000 + } + + decoder := xml.NewDecoder(file) + events := make([]TraceEvent, 0, estimatedEvents) + configs := make([]DBConfig, 0, 100) // Pre-allocate for configs + minTime := 0.0 + maxTime := 0.0 + + fmt.Printf("Loading trace file (%.1f GB)...\n", float64(fileSize)/(1024*1024*1024)) + + eventCount := 0 + for { + token, err := decoder.Token() + if err == io.EOF { + break + } + if err != nil { + return nil, fmt.Errorf("failed to decode XML: %w", err) + } + + switch elem := token.(type) { + case xml.StartElement: + if elem.Name.Local == "Event" { + event := TraceEvent{ + Attrs: make(map[string]string, 10), // Pre-allocate with capacity + } + + // Parse all attributes + for _, attr := range elem.Attr { + switch attr.Name.Local { + case "Severity": + event.Severity = attr.Value + case "Time": + event.Time = attr.Value + // Parse time as float + if t, err := strconv.ParseFloat(attr.Value, 64); err == nil { + event.TimeValue = t + if t > maxTime { + maxTime = t + } + } + case "DateTime": + event.DateTime = attr.Value + case "Type": + event.Type = attr.Value + case "Machine": + event.Machine = attr.Value + case "ID": + event.ID = attr.Value + default: + // Store any additional attributes + event.Attrs[attr.Name.Local] = attr.Value + } + } + + events = append(events, event) + eventCount++ + + // Show progress every 100k events + if eventCount%100000 == 0 { + fmt.Printf(" Loaded %dK events...\n", eventCount/1000) + } + + // Parse DB config if this is a MasterRecoveryState event + if event.Type == "MasterRecoveryState" { + if confStr, ok := event.Attrs["Conf"]; ok { + if config := parseDBConfig(confStr, event.TimeValue); config != nil { + configs = append(configs, *config) + } + } + } + } + } + } + + fmt.Printf("Loaded %d events total. Sorting...\n", len(events)) + + // Sort events by time + sort.Slice(events, func(i, j int) bool { + return events[i].TimeValue < events[j].TimeValue + }) + + // Sort configs by time + sort.Slice(configs, func(i, j int) bool { + return configs[i].Time < configs[j].Time + }) + + fmt.Println("Building indices...") + + // Build RecoveryStates array from sorted events with correct indices + recoveryStates := make([]RecoveryState, 0, len(configs)) + for i, event := range events { + if event.Type == "MasterRecoveryState" { + statusCode := event.Attrs["StatusCode"] + status := event.Attrs["Status"] + if statusCode != "" && status != "" { + recoveryStates = append(recoveryStates, RecoveryState{ + Time: event.TimeValue, + StatusCode: statusCode, + Status: status, + EventIndex: i, + }) + } + } + } + + // Build EpochVersions array from GetDurableResult and UpdateRegistration events + // GetDurableResult provides KnownCommittedVersion (KCV) and EpochEnd (RV) + // UpdateRegistration provides RecoveryCount (epoch), RecoveryTxnVersion, LastEpochEnd (RV) + epochVersions := make([]EpochVersionInfo, 0, 100) + + // Track KCV from GetDurableResult keyed by EndVersion (which equals LastEpochEnd in UpdateRegistration) + kcvByRV := make(map[int64]int64) // EndVersion/LastEpochEnd -> KCV + + // First pass: collect KCV values from GetDurableResult + for _, event := range events { + if event.Type == "GetDurableResult" { + var endVersion, kcv int64 + // Use EndVersion (not EpochEnd which can be INT64_MAX placeholder) + if endVersionStr, ok := event.Attrs["EndVersion"]; ok { + endVersion, _ = strconv.ParseInt(endVersionStr, 10, 64) + } + if kcvStr, ok := event.Attrs["KnownCommittedVersion"]; ok { + kcv, _ = strconv.ParseInt(kcvStr, 10, 64) + } + if endVersion > 0 && kcv > 0 { + kcvByRV[endVersion] = kcv + } + } + } + + // Second pass: build epoch versions from UpdateRegistration + for i, event := range events { + if event.Type == "UpdateRegistration" { + var epoch, recoveryTxn, lastEpochEnd int64 + + if epochStr, ok := event.Attrs["RecoveryCount"]; ok { + epoch, _ = strconv.ParseInt(epochStr, 10, 64) + } + if recoveryTxnStr, ok := event.Attrs["RecoveryTxnVersion"]; ok { + recoveryTxn, _ = strconv.ParseInt(recoveryTxnStr, 10, 64) + } + if lastEpochEndStr, ok := event.Attrs["LastEpochEnd"]; ok { + lastEpochEnd, _ = strconv.ParseInt(lastEpochEndStr, 10, 64) + } + + // Skip events with invalid/unset values (-1 means not determined yet) + if recoveryTxn < 0 || lastEpochEnd < 0 { + continue + } + + // Only record if we have meaningful data + if epoch > 0 || recoveryTxn > 0 || lastEpochEnd > 0 { + info := EpochVersionInfo{ + Time: event.TimeValue, + Epoch: epoch, + RV: lastEpochEnd, + RecoveryTxnVersion: recoveryTxn, + LastEpochEnd: lastEpochEnd, + EventIndex: i, + } + + // Try to find matching KCV from GetDurableResult + if kcv, ok := kcvByRV[lastEpochEnd]; ok { + info.KCV = kcv + info.HasKCV = true + } + info.HasRecoveryTxnVersion = true + + epochVersions = append(epochVersions, info) + } + } + } + + // Sort epoch versions by time + sort.Slice(epochVersions, func(i, j int) bool { + return epochVersions[i].Time < epochVersions[j].Time + }) + + // Calculate minimum time step from actual event intervals (sample to avoid full scan) + timeStep := 0.1 // Default fallback + if len(events) > 1 { + // Only sample first 10000 events to find min diff (faster) + sampleSize := 10000 + if len(events) < sampleSize { + sampleSize = len(events) + } + minDiff := events[1].TimeValue - events[0].TimeValue + for i := 2; i < sampleSize; i++ { + diff := events[i].TimeValue - events[i-1].TimeValue + if diff > 0 && diff < minDiff { + minDiff = diff + } + } + if minDiff > 0 { + timeStep = minDiff + } + } + + fmt.Println("Ready!") + + return &TraceData{ + Events: events, + Configs: configs, + RecoveryStates: recoveryStates, + EpochVersions: epochVersions, + MinTime: minTime, + MaxTime: maxTime, + TimeStep: timeStep, + }, nil +} + +// parseDBConfig parses the HTML-encoded JSON config string +func parseDBConfig(confStr string, time float64) *DBConfig { + // Decode HTML entities (" -> ") + decoded := html.UnescapeString(confStr) + + // Parse JSON + var rawConfig map[string]interface{} + if err := json.Unmarshal([]byte(decoded), &rawConfig); err != nil { + return nil + } + + config := &DBConfig{ + Time: time, + RawJSON: rawConfig, + } + + // Extract common fields + if v, ok := rawConfig["redundancy_mode"].(string); ok { + config.RedundancyMode = v + } + if v, ok := rawConfig["usable_regions"].(float64); ok { + config.UsableRegions = int(v) + } + if v, ok := rawConfig["logs"].(float64); ok { + config.Logs = int(v) + } + if v, ok := rawConfig["log_routers"].(float64); ok { + config.LogRouters = int(v) + } + if v, ok := rawConfig["remote_logs"].(float64); ok { + config.RemoteLogs = int(v) + } + if v, ok := rawConfig["proxies"].(float64); ok { + config.Proxies = int(v) + } + if v, ok := rawConfig["grv_proxies"].(float64); ok { + config.GrvProxies = int(v) + } + if v, ok := rawConfig["backup_worker_enabled"].(float64); ok { + config.BackupWorkerEnabled = int(v) + } + if v, ok := rawConfig["storage_engine"].(string); ok { + config.StorageEngine = v + } + if v, ok := rawConfig["remote_redundancy_mode"].(string); ok { + config.RemoteRedundancyMode = v + } + if v, ok := rawConfig["tenant_mode"].(string); ok { + config.TenantMode = v + } + + return config +} + +// GetEventsUpToTime returns all events that occurred up to and including the given time +func (td *TraceData) GetEventsUpToTime(targetTime float64) []TraceEvent { + // Binary search to find the index + idx := sort.Search(len(td.Events), func(i int) bool { + return td.Events[i].TimeValue > targetTime + }) + + if idx == 0 { + return []TraceEvent{} + } + + return td.Events[:idx] +} + +// GetLatestConfigAtTime returns the latest DB config at or before the given time +func (td *TraceData) GetLatestConfigAtTime(targetTime float64) *DBConfig { + // Binary search to find the latest config <= targetTime + idx := sort.Search(len(td.Configs), func(i int) bool { + return td.Configs[i].Time > targetTime + }) + + if idx == 0 { + return nil + } + + return &td.Configs[idx-1] +} + +// GetLatestRecoveryStateAtIndex returns the latest recovery state at or before the given event index +func (td *TraceData) GetLatestRecoveryStateAtIndex(eventIndex int) *RecoveryState { + // Binary search to find the latest recovery state with EventIndex <= eventIndex + idx := sort.Search(len(td.RecoveryStates), func(i int) bool { + return td.RecoveryStates[i].EventIndex > eventIndex + }) + + if idx == 0 { + return nil + } + + return &td.RecoveryStates[idx-1] +} + +// GetLatestEpochVersionAtIndex returns the latest epoch version info at or before the given event index +func (td *TraceData) GetLatestEpochVersionAtIndex(eventIndex int) *EpochVersionInfo { + // Binary search to find the latest epoch version with EventIndex <= eventIndex + idx := sort.Search(len(td.EpochVersions), func(i int) bool { + return td.EpochVersions[i].EventIndex > eventIndex + }) + + if idx == 0 { + return nil + } + + return &td.EpochVersions[idx-1] +} + +// FindPreviousRecovery finds the latest MasterRecoveryState before the given event index +func (td *TraceData) FindPreviousRecovery(eventIndex int) *RecoveryState { + // Binary search to find where to start looking + idx := sort.Search(len(td.RecoveryStates), func(i int) bool { + return td.RecoveryStates[i].EventIndex >= eventIndex + }) + + // Walk backwards from idx-1 to find any recovery state + if idx > 0 { + return &td.RecoveryStates[idx-1] + } + + return nil +} + +// FindNextRecovery finds the earliest MasterRecoveryState after the given event index +func (td *TraceData) FindNextRecovery(eventIndex int) *RecoveryState { + // Binary search to find where to start looking + // Use > to skip the current event index + idx := sort.Search(len(td.RecoveryStates), func(i int) bool { + return td.RecoveryStates[i].EventIndex > eventIndex + }) + + // Return first recovery state after current index + if idx < len(td.RecoveryStates) { + return &td.RecoveryStates[idx] + } + + return nil +} + +// FindPreviousRecoveryWithStatusCode finds the latest recovery state before the given event index with the specified status code +func (td *TraceData) FindPreviousRecoveryWithStatusCode(eventIndex int, statusCode string) *RecoveryState { + // Binary search to find where to start looking + idx := sort.Search(len(td.RecoveryStates), func(i int) bool { + return td.RecoveryStates[i].EventIndex >= eventIndex + }) + + // Walk backwards from idx-1 to find the first match + // Start from idx-1 to skip any recovery at exactly eventIndex + for i := idx - 1; i >= 0; i-- { + if td.RecoveryStates[i].StatusCode == statusCode { + return &td.RecoveryStates[i] + } + } + + return nil +} + +// FindNextRecoveryWithStatusCode finds the earliest recovery state after the given event index with the specified status code +func (td *TraceData) FindNextRecoveryWithStatusCode(eventIndex int, statusCode string) *RecoveryState { + // Binary search to find where to start looking + // Use > to skip the current event index + idx := sort.Search(len(td.RecoveryStates), func(i int) bool { + return td.RecoveryStates[i].EventIndex > eventIndex + }) + + // Walk forwards from idx to find the first match + for i := idx; i < len(td.RecoveryStates); i++ { + if td.RecoveryStates[i].StatusCode == statusCode { + return &td.RecoveryStates[i] + } + } + + return nil +} + +// GetEventIndexAtTime finds the index of the first event at or closest to targetTime +func (td *TraceData) GetEventIndexAtTime(targetTime float64) int { + // Binary search to find the first event at targetTime or later + idx := sort.Search(len(td.Events), func(i int) bool { + return td.Events[i].TimeValue >= targetTime + }) + + if idx >= len(td.Events) { + // If no event at or after targetTime, return last event + return len(td.Events) - 1 + } + + // If we found an event at exactly targetTime, return it + if td.Events[idx].TimeValue == targetTime { + return idx + } + + // Otherwise, check if the previous event is closer + if idx > 0 { + prevDiff := targetTime - td.Events[idx-1].TimeValue + nextDiff := td.Events[idx].TimeValue - targetTime + if prevDiff < nextDiff { + return idx - 1 + } + } + + return idx +} diff --git a/contrib/replay/ui.go b/contrib/replay/ui.go new file mode 100644 index 00000000000..437a80d2962 --- /dev/null +++ b/contrib/replay/ui.go @@ -0,0 +1,4617 @@ +package main + +import ( + "encoding/json" + "fmt" + "regexp" + "sort" + "strconv" + "strings" + + "github.com/charmbracelet/bubbles/textinput" + tea "github.com/charmbracelet/bubbletea" + "github.com/charmbracelet/lipgloss" +) + +// model holds the UI state for the Bubbletea application +type model struct { + traceData *TraceData + currentTime float64 + currentEventIndex int // Index of current event in traceData.Events + clusterState *ClusterState + width int + height int + timeInputMode bool + timeInput textinput.Model + configViewMode bool + configScrollOffset int // Vertical scroll offset for config popup + helpViewMode bool // Help popup mode + healthViewMode bool // Health popup mode + healthScrollOffset int // Vertical scroll offset for health popup + searchMode bool // Search input mode + searchDirection string // "forward" or "backward" + searchInput textinput.Model + searchPattern string // Current search pattern + searchActive bool // Whether search highlighting is active + // Filter state + filterViewMode bool // Filter popup mode + filterShowAll bool // Whether "All" is checked (default true) + filterCurrentCategory int // 0=Raw, 1=Machine, 2=Time, 3=Message + // Category 1: Raw filters (OR logic) + filterRawList []string // Raw filter patterns + filterRawInput textinput.Model + filterRawSelectedIndex int + filterRawInputActive bool + filterRawColumn int // Current column in raw filter list (for ctrl+f/ctrl+b navigation) + filterRawDisabled map[int]bool // Track disabled raw filters (map[index]disabled) + filterRawCompiledRegex []*regexp.Regexp // Pre-compiled regex patterns for raw filters (parallel to filterRawList) + filterTypeSearchMode bool // Type search popup mode + filterTypeSearchInput textinput.Model + filterTypeSearchList []string // All unique Type values from trace + filterTypeSearchSelected int // Currently selected Type in list + // Category 2: Machine filters (OR logic) + filterMachineList []string // Selected machine addresses + filterMachineSet map[string]bool // Set for O(1) lookup of selected machines + filterMachineSelectMode bool // In machine selection popup + filterMachineInput textinput.Model // For fuzzy search in machine popup + filterMachineDCs map[string]bool // Selected DCs (map[dcID]selected) + filterMachineScroll int // Scroll offset in machine popup (deprecated, using columns now) + filterMachineSelected int // Currently highlighted item in machine popup + filterMachineColumn int // Current column in machine popup (for ctrl+f/ctrl+b navigation) + // Category 3: Time range filter + filterTimeEnabled bool // Whether time filter is active + filterTimeStart float64 // Start time + filterTimeEnd float64 // End time + filterTimeInputMode bool // Configuring time range + filterTimeEditingStart bool // true=editing start, false=editing end + filterTimeInput textinput.Model + // Category 4: Message filter (NetworkMessageSent events only) + filterMessageEnabled bool // Whether message filter is active + // Performance caches + machineDCCache map[string]string // Cache DC extraction results (map[machineAddr]dcID) +} + +// newModel creates a new model with the given trace data +func newModel(traceData *TraceData) model { + ti := textinput.New() + ti.Placeholder = "Enter time in seconds (e.g., 123.456)" + ti.CharLimit = 20 + ti.Width = 40 + + si := textinput.New() + si.Placeholder = "Enter search pattern (use * for wildcard)" + si.CharLimit = 100 + si.Width = 60 + + // Raw filter input + rawInput := textinput.New() + rawInput.Placeholder = "Enter raw filter pattern (e.g., Type=WorkerHealthMonitor or Role*TL)" + rawInput.CharLimit = 100 + rawInput.Width = 70 + + // Machine filter fuzzy search input + machineInput := textinput.New() + machineInput.Placeholder = "Type to filter machines..." + machineInput.CharLimit = 50 + machineInput.Width = 60 + + // Time filter input + timeFilterInput := textinput.New() + timeFilterInput.Placeholder = "Enter time (e.g., 4.5)" + timeFilterInput.CharLimit = 20 + timeFilterInput.Width = 30 + + // Type search input + typeSearchInput := textinput.New() + typeSearchInput.Placeholder = "Type to search event types..." + typeSearchInput.CharLimit = 50 + typeSearchInput.Width = 60 + + // Extract all unique Type values from trace data + typeSet := make(map[string]bool) + for _, event := range traceData.Events { + if event.Type != "" { + typeSet[event.Type] = true + } + } + var allTypes []string + for t := range typeSet { + allTypes = append(allTypes, t) + } + sort.Strings(allTypes) + + return model{ + traceData: traceData, + currentTime: 0.0, + currentEventIndex: 0, + clusterState: NewClusterState(), + timeInputMode: false, + timeInput: ti, + configViewMode: false, + configScrollOffset: 0, + helpViewMode: false, + healthViewMode: false, + healthScrollOffset: 0, + searchMode: false, + searchDirection: "", + searchInput: si, + searchPattern: "", + searchActive: false, + // Filter initialization + filterViewMode: false, + filterShowAll: true, // Default: show all events + filterCurrentCategory: 0, // Start in Raw category + filterRawList: []string{}, + filterRawInput: rawInput, + filterRawSelectedIndex: -1, + filterRawInputActive: false, + filterRawColumn: 0, + filterRawDisabled: make(map[int]bool), + filterRawCompiledRegex: []*regexp.Regexp{}, + filterTypeSearchMode: false, + filterTypeSearchInput: typeSearchInput, + filterTypeSearchList: allTypes, + filterTypeSearchSelected: 0, + filterMachineList: []string{}, + filterMachineSet: make(map[string]bool), + filterMachineSelectMode: false, + filterMachineInput: machineInput, + filterMachineDCs: make(map[string]bool), + filterMachineScroll: 0, + filterMachineSelected: 0, + filterMachineColumn: 0, + filterTimeEnabled: false, + filterTimeStart: traceData.MinTime, + filterTimeEnd: traceData.MaxTime, + filterTimeInputMode: false, + filterTimeEditingStart: true, + filterTimeInput: timeFilterInput, + filterMessageEnabled: false, + machineDCCache: make(map[string]string), + } +} + +// Init initializes the model (required by Bubbletea) +func (m model) Init() tea.Cmd { + return nil +} + +// Update handles messages and updates the model (required by Bubbletea) +func (m model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { + var cmd tea.Cmd + + // Handle filter view mode + if m.filterViewMode { + // If in machine selection popup + if m.filterMachineSelectMode { + return m.handleMachineSelectionPopup(msg) + } + + // If in time input mode within filter + if m.filterTimeInputMode { + return m.handleFilterTimeInput(msg) + } + + // If in Type search popup + if m.filterTypeSearchMode { + return m.handleTypeSearchPopup(msg) + } + + switch msg := msg.(type) { + case tea.KeyMsg: + // Handle raw filter input mode + if m.filterRawInputActive { + switch msg.String() { + case "enter": + if filterText := m.filterRawInput.Value(); filterText != "" { + // Check if we're editing an existing filter or adding new + if m.filterRawSelectedIndex >= 0 && m.filterRawSelectedIndex < len(m.filterRawList) { + // Editing existing - check if original value is still in input + // If user entered edit mode via enter, replace the filter + m.filterRawList[m.filterRawSelectedIndex] = filterText + } else { + // Adding new + m.filterRawList = append(m.filterRawList, filterText) + m.filterRawSelectedIndex = len(m.filterRawList) - 1 + } + // Recompile regex patterns for performance + m.recompileRawFilterRegexes() + m.filterRawInput.Reset() + m.filterRawInput.Blur() + m.filterRawInputActive = false + } + return m, nil + case "esc", "ctrl+c": + m.filterRawInput.Reset() + m.filterRawInput.Blur() + m.filterRawInputActive = false + return m, nil + default: + m.filterRawInput, cmd = m.filterRawInput.Update(msg) + return m, cmd + } + } + + // Handle keys when not in input mode + switch msg.String() { + case "q", "f", "esc", "ctrl+c": + m.filterViewMode = false + m.filterRawInput.Reset() + // Ensure current event is visible after filter changes + m.ensureCurrentEventVisible() + return m, nil + + case "space", " ": + // Toggle "All" checkbox + m.filterShowAll = !m.filterShowAll + // Ensure current event is visible after toggling + m.ensureCurrentEventVisible() + return m, nil + + case "ctrl+n": + // Move to next category OR navigate down in raw filter list + if !m.filterShowAll { + // If in Raw category with filters, try to navigate within the list first + if m.filterCurrentCategory == 0 && len(m.filterRawList) > 0 { + maxRows := 5 + itemsPerColumn := maxRows + + // Calculate the start and end index for current column + startIdx := m.filterRawColumn * itemsPerColumn + endIdx := startIdx + itemsPerColumn + if endIdx > len(m.filterRawList) { + endIdx = len(m.filterRawList) + } + + // Check if we can move down within this column + if m.filterRawSelectedIndex < endIdx - 1 { + // Move down within this column + m.filterRawSelectedIndex++ + return m, nil + } else { + // At the end of column, switch to next category + m.filterCurrentCategory = (m.filterCurrentCategory + 1) % 4 + return m, nil + } + } else { + // Not in Raw category or no filters, just switch category + m.filterCurrentCategory = (m.filterCurrentCategory + 1) % 4 + } + } + return m, nil + + case "ctrl+p": + // Move to previous category OR navigate up in raw filter list + if !m.filterShowAll { + // If in Raw category with filters, try to navigate within the list first + if m.filterCurrentCategory == 0 && len(m.filterRawList) > 0 { + maxRows := 5 + itemsPerColumn := maxRows + + // Calculate the start and end index for current column + startIdx := m.filterRawColumn * itemsPerColumn + + // Check if we can move up within this column + if m.filterRawSelectedIndex > startIdx { + // Move up within this column + m.filterRawSelectedIndex-- + return m, nil + } else { + // At the start of column, switch to previous category + m.filterCurrentCategory = (m.filterCurrentCategory - 1 + 4) % 4 + return m, nil + } + } else { + // Not in Raw category or no filters, just switch category + m.filterCurrentCategory = (m.filterCurrentCategory - 1 + 4) % 4 + } + } + return m, nil + + case "1": + // Jump to Raw category + if !m.filterShowAll { + m.filterCurrentCategory = 0 + } + return m, nil + + case "2": + // Jump to Machine category + if !m.filterShowAll { + m.filterCurrentCategory = 1 + } + return m, nil + + case "3": + // Jump to Time category + if !m.filterShowAll { + m.filterCurrentCategory = 2 + } + return m, nil + + case "4": + // Jump to Message category + if !m.filterShowAll { + m.filterCurrentCategory = 3 + } + return m, nil + + case "a": + // Add new filter (only in Raw category) + if !m.filterShowAll && m.filterCurrentCategory == 0 { + m.filterRawInputActive = true + m.filterRawSelectedIndex = -1 // Mark as adding new (not editing) + m.filterRawInput.Focus() + return m, textinput.Blink + } + return m, nil + + case "r": + // Remove selected filter (only in Raw category) + if !m.filterShowAll && m.filterCurrentCategory == 0 { + if m.filterRawSelectedIndex >= 0 && m.filterRawSelectedIndex < len(m.filterRawList) { + // Remove filter and update disabled map + removedIdx := m.filterRawSelectedIndex + m.filterRawList = append(m.filterRawList[:removedIdx], m.filterRawList[removedIdx+1:]...) + + // Rebuild disabled map, shifting indices + newDisabled := make(map[int]bool) + for i, disabled := range m.filterRawDisabled { + if i < removedIdx { + newDisabled[i] = disabled + } else if i > removedIdx { + newDisabled[i-1] = disabled + } + // Skip i == removedIdx (the removed filter) + } + m.filterRawDisabled = newDisabled + + // Recompile regex patterns for performance + m.recompileRawFilterRegexes() + + // Update selection + if m.filterRawSelectedIndex >= len(m.filterRawList) { + m.filterRawSelectedIndex = len(m.filterRawList) - 1 + } + if m.filterRawSelectedIndex < 0 { + m.filterRawSelectedIndex = 0 + } + + // Update column index + if len(m.filterRawList) > 0 { + maxRows := 5 + m.filterRawColumn = m.filterRawSelectedIndex / maxRows + } else { + m.filterRawColumn = 0 + } + } + } + return m, nil + + case "e": + // Edit selected filter (only in Raw category) + if !m.filterShowAll && m.filterCurrentCategory == 0 { + if m.filterRawSelectedIndex >= 0 && m.filterRawSelectedIndex < len(m.filterRawList) { + m.filterRawInputActive = true + m.filterRawInput.SetValue(m.filterRawList[m.filterRawSelectedIndex]) + m.filterRawInput.Focus() + // We'll replace this filter when user presses enter again + return m, textinput.Blink + } + } + return m, nil + + case "d": + // Toggle disabled state for selected raw filter OR toggle time filter OR toggle message filter + if !m.filterShowAll { + if m.filterCurrentCategory == 0 { + // Raw category: toggle disabled state for selected filter + if m.filterRawSelectedIndex >= 0 && m.filterRawSelectedIndex < len(m.filterRawList) { + m.filterRawDisabled[m.filterRawSelectedIndex] = !m.filterRawDisabled[m.filterRawSelectedIndex] + // Ensure current event is visible after toggling + m.ensureCurrentEventVisible() + } + } else if m.filterCurrentCategory == 2 { + // Time category: toggle time filter enabled/disabled + m.filterTimeEnabled = !m.filterTimeEnabled + // Ensure current event is visible after toggling + m.ensureCurrentEventVisible() + } else if m.filterCurrentCategory == 3 { + // Message category: toggle message filter enabled/disabled + m.filterMessageEnabled = !m.filterMessageEnabled + // Ensure current event is visible after toggling + m.ensureCurrentEventVisible() + } + } + return m, nil + + case "t": + // Open Type search popup (only in Raw category) + if m.filterCurrentCategory == 0 { + m.filterTypeSearchMode = true + m.filterTypeSearchInput.Focus() + m.filterTypeSearchSelected = 0 + return m, textinput.Blink + } + return m, nil + + case "c": + // Toggle common trace event filters (only in Raw category) + if m.filterCurrentCategory == 0 { + // Define common trace event types + commonTypes := []string{ + "Type=MasterRecoveryState", + "Type=ProxyReject", + "Type=CCWDB", + "Type=TPLSOnErrorLogSystemFailed", + "Type=TPLSOnErrorBackupFailed", + "Type=ForcedRecoveryStart", + "Type=MasterRegistrationKill", + "Type=ClusterControllerHealthMonitor", + "Type=DegradedServerDetectedAndTriggerRecovery", + "Type=RecentRecoveryCountHigh", + "Type=DegradedServerDetectedAndSuggestRecovery", + "Type=DegradedServerDetectedAndTriggerFailover", + "Type=AddFailureInjectionWorkload", + "Type=TrackTLogRecovery", + "Type=MutationTracking", + "Type=Assassination", + "Severity=40", + } + + // Check if any common filters exist + hasCommonFilters := false + for _, filter := range m.filterRawList { + for _, common := range commonTypes { + if filter == common { + hasCommonFilters = true + break + } + } + if hasCommonFilters { + break + } + } + + if hasCommonFilters { + // Remove all common filters + var newList []string + newDisabled := make(map[int]bool) + newIdx := 0 + for i, filter := range m.filterRawList { + isCommon := false + for _, common := range commonTypes { + if filter == common { + isCommon = true + break + } + } + if !isCommon { + newList = append(newList, filter) + // Preserve disabled state for non-common filters + if m.filterRawDisabled[i] { + newDisabled[newIdx] = true + } + newIdx++ + } + } + m.filterRawList = newList + m.filterRawDisabled = newDisabled + m.filterRawSelectedIndex = 0 + m.filterRawColumn = 0 // Reset to first column + if m.filterRawSelectedIndex >= len(m.filterRawList) { + m.filterRawSelectedIndex = len(m.filterRawList) - 1 + } + } else { + // Add all common filters (enabled by default) + startIdx := len(m.filterRawList) + m.filterRawList = append(m.filterRawList, commonTypes...) + m.filterRawSelectedIndex = startIdx + // Update column index to show the new filters + maxRows := 5 + m.filterRawColumn = m.filterRawSelectedIndex / maxRows + } + + // Recompile regex patterns for performance + m.recompileRawFilterRegexes() + + // Update visibility after filter change + m.ensureCurrentEventVisible() + } + return m, nil + + case "enter": + // Open configuration for Machine or Time category only + if !m.filterShowAll { + if m.filterCurrentCategory == 1 { + // Enter machine selection mode + m.filterMachineSelectMode = true + m.filterMachineInput.Focus() + return m, textinput.Blink + } else if m.filterCurrentCategory == 2 { + // Enter time configuration mode + m.filterTimeInputMode = true + m.filterTimeEditingStart = true + m.filterTimeInput.SetValue(fmt.Sprintf("%.6f", m.filterTimeStart)) + m.filterTimeInput.Focus() + return m, textinput.Blink + } + } + return m, nil + + case "ctrl+f": + // Move to next column in raw filter list (only in Raw category) + if !m.filterShowAll && m.filterCurrentCategory == 0 && len(m.filterRawList) > 0 { + // Pack into columns to determine count + maxRows := 5 + numColumns := (len(m.filterRawList) + maxRows - 1) / maxRows + if numColumns > 0 { + m.filterRawColumn++ + if m.filterRawColumn >= numColumns { + m.filterRawColumn = 0 // Wrap to first column + } + // Set selection to first item in new column + m.filterRawSelectedIndex = m.filterRawColumn * maxRows + // Clamp to valid range + if m.filterRawSelectedIndex >= len(m.filterRawList) { + m.filterRawSelectedIndex = len(m.filterRawList) - 1 + } + } + } + return m, nil + + case "ctrl+b": + // Move to previous column in raw filter list (only in Raw category) + if !m.filterShowAll && m.filterCurrentCategory == 0 && len(m.filterRawList) > 0 { + // Pack into columns to determine count + maxRows := 5 + numColumns := (len(m.filterRawList) + maxRows - 1) / maxRows + if numColumns > 0 { + m.filterRawColumn-- + if m.filterRawColumn < 0 { + m.filterRawColumn = numColumns - 1 // Wrap to last column + } + // Set selection to first item in new column + m.filterRawSelectedIndex = m.filterRawColumn * maxRows + // Clamp to valid range + if m.filterRawSelectedIndex >= len(m.filterRawList) { + m.filterRawSelectedIndex = len(m.filterRawList) - 1 + } + } + } + return m, nil + } + } + return m, nil + } + + // Handle help view mode + if m.helpViewMode { + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "q", "h", "esc", "ctrl+c": + // Exit help view mode + m.helpViewMode = false + return m, nil + } + } + return m, nil + } + + // Handle health view mode + if m.healthViewMode { + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "q", "x", "esc", "ctrl+c": + // Exit health view mode + m.healthViewMode = false + m.healthScrollOffset = 0 // Reset scroll when exiting + return m, nil + case "ctrl+p": + // Scroll up in health view + if m.healthScrollOffset > 0 { + m.healthScrollOffset-- + } + return m, nil + case "ctrl+n": + // Scroll down in health view + m.healthScrollOffset++ + // Will be clamped in renderHealthPopup + return m, nil + } + } + return m, nil + } + + // Handle config view mode + if m.configViewMode { + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "q", "c", "esc", "ctrl+c": + // Exit config view mode + m.configViewMode = false + m.configScrollOffset = 0 // Reset scroll when exiting + return m, nil + case "ctrl+p": + // Scroll up in config view + if m.configScrollOffset > 0 { + m.configScrollOffset-- + } + return m, nil + case "ctrl+n": + // Scroll down in config view + m.configScrollOffset++ + // Will be clamped in renderConfigPopup + return m, nil + } + } + return m, nil + } + + // Handle time input mode separately + if m.timeInputMode { + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "enter": + // Try to parse the time and jump to it + if timeStr := m.timeInput.Value(); timeStr != "" { + if targetTime, err := strconv.ParseFloat(timeStr, 64); err == nil { + // Only jump if within valid range + if targetTime >= m.traceData.MinTime && targetTime <= m.traceData.MaxTime { + targetIdx := m.traceData.GetEventIndexAtTime(targetTime) + // Find nearest visible event from target (search forward first, then backward) + found := false + // Try forward + for i := targetIdx; i < len(m.traceData.Events); i++ { + if eventMatchesFilters(&m.traceData.Events[i], &m) { + m.currentEventIndex = i + m.currentTime = m.traceData.Events[i].TimeValue + m.updateClusterState() + found = true + break + } + } + // If not found forward, try backward + if !found { + for i := targetIdx - 1; i >= 0; i-- { + if eventMatchesFilters(&m.traceData.Events[i], &m) { + m.currentEventIndex = i + m.currentTime = m.traceData.Events[i].TimeValue + m.updateClusterState() + break + } + } + } + // Exit time input mode + m.timeInputMode = false + m.timeInput.Reset() + } + // If invalid, stay in input mode so user can see the error and correct it + return m, nil + } + } + // If empty or parse error, stay in input mode + return m, nil + + case "esc", "ctrl+c", "q", "t": + // Cancel time input + m.timeInputMode = false + m.timeInput.Reset() + return m, nil + } + } + + // Update the text input + m.timeInput, cmd = m.timeInput.Update(msg) + return m, cmd + } + + // Handle search mode separately + if m.searchMode { + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "enter": + // Perform search with the entered pattern + if searchText := m.searchInput.Value(); searchText != "" { + m.searchPattern = searchText + m.searchActive = true + + // Search for match + var matchIndex int + if m.searchDirection == "forward" { + matchIndex = m.searchForward(m.currentEventIndex + 1, m.searchPattern) + } else { + matchIndex = m.searchBackward(m.currentEventIndex - 1, m.searchPattern) + } + + if matchIndex >= 0 { + m.currentEventIndex = matchIndex + m.currentTime = m.traceData.Events[m.currentEventIndex].TimeValue + m.updateClusterState() + } + } + // Exit search input mode but keep search active + m.searchMode = false + m.searchInput.Blur() + return m, nil + + case "esc", "ctrl+c": + // Cancel search input + m.searchMode = false + m.searchInput.Reset() + m.searchInput.Blur() + return m, nil + } + } + + // Update the search input + m.searchInput, cmd = m.searchInput.Update(msg) + return m, cmd + } + + // Normal mode (not in time input) + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "ctrl+c", "q", "Q": + return m, tea.Quit + + case "t": + // Enter time input mode + m.timeInputMode = true + m.timeInput.Focus() + return m, textinput.Blink + + case "c": + // Enter config view mode + m.configViewMode = true + return m, nil + + case "h": + // Enter help view mode + m.helpViewMode = true + return m, nil + + case "x": + // Enter health view mode + m.healthViewMode = true + return m, nil + + case "f": + // Enter filter view mode + m.filterViewMode = true + return m, nil + + case "E", "shift+e": + // Jump backward to previous MasterRecoveryState (any) + recovery := m.traceData.FindPreviousRecovery(m.currentEventIndex) + // If filterShowAll is false and time filter is enabled, keep searching until we find one in range + for recovery != nil { + if m.filterShowAll || !m.filterTimeEnabled || (recovery.Time >= m.filterTimeStart && recovery.Time <= m.filterTimeEnd) { + m.currentEventIndex = recovery.EventIndex + m.currentTime = recovery.Time + m.updateClusterState() + break + } + // Try previous recovery + recovery = m.traceData.FindPreviousRecovery(recovery.EventIndex - 1) + } + + case "e": + // Jump forward to next MasterRecoveryState (any) + recovery := m.traceData.FindNextRecovery(m.currentEventIndex) + // If filterShowAll is false and time filter is enabled, keep searching until we find one in range + for recovery != nil { + if m.filterShowAll || !m.filterTimeEnabled || (recovery.Time >= m.filterTimeStart && recovery.Time <= m.filterTimeEnd) { + m.currentEventIndex = recovery.EventIndex + m.currentTime = recovery.Time + m.updateClusterState() + break + } + // Try next recovery + recovery = m.traceData.FindNextRecovery(recovery.EventIndex + 1) + } + + case "ctrl+n": + // Move forward to next visible (non-filtered) event + if m.currentEventIndex < len(m.traceData.Events)-1 { + // Find next event that matches filters + for i := m.currentEventIndex + 1; i < len(m.traceData.Events); i++ { + if eventMatchesFilters(&m.traceData.Events[i], &m) { + m.currentEventIndex = i + m.currentTime = m.traceData.Events[m.currentEventIndex].TimeValue + m.updateClusterState() + break + } + } + } + + case "ctrl+p": + // Move backward to previous visible (non-filtered) event + if m.currentEventIndex > 0 { + // Find previous event that matches filters + for i := m.currentEventIndex - 1; i >= 0; i-- { + if eventMatchesFilters(&m.traceData.Events[i], &m) { + m.currentEventIndex = i + m.currentTime = m.traceData.Events[m.currentEventIndex].TimeValue + m.updateClusterState() + break + } + } + } + + case "ctrl+v": + // Page forward (1 second) to next visible event + newTime := m.currentTime + 1.0 + if newTime <= m.traceData.MaxTime { + targetIdx := m.traceData.GetEventIndexAtTime(newTime) + // Find next visible event from target + for i := targetIdx; i < len(m.traceData.Events); i++ { + if eventMatchesFilters(&m.traceData.Events[i], &m) { + m.currentEventIndex = i + m.currentTime = m.traceData.Events[m.currentEventIndex].TimeValue + m.updateClusterState() + break + } + } + } + + case "alt+v": + // Page backward (1 second) to previous visible event + newTime := m.currentTime - 1.0 + if newTime >= m.traceData.MinTime { + targetIdx := m.traceData.GetEventIndexAtTime(newTime) + // Find previous visible event from target + for i := targetIdx; i >= 0; i-- { + if eventMatchesFilters(&m.traceData.Events[i], &m) { + m.currentEventIndex = i + m.currentTime = m.traceData.Events[m.currentEventIndex].TimeValue + m.updateClusterState() + break + } + } + } + + case "g": + // Jump to start - first visible event + for i := 0; i < len(m.traceData.Events); i++ { + if eventMatchesFilters(&m.traceData.Events[i], &m) { + m.currentEventIndex = i + m.currentTime = m.traceData.Events[i].TimeValue + m.updateClusterState() + break + } + } + + case "G", "shift+g": + // Jump to end - last visible event + for i := len(m.traceData.Events) - 1; i >= 0; i-- { + if eventMatchesFilters(&m.traceData.Events[i], &m) { + m.currentEventIndex = i + m.currentTime = m.traceData.Events[i].TimeValue + m.updateClusterState() + break + } + } + + case "R", "shift+r": + // Jump backward to latest MasterRecoveryState with StatusCode="0" + recovery := m.traceData.FindPreviousRecoveryWithStatusCode(m.currentEventIndex, "0") + // If filterShowAll is false and time filter is enabled, keep searching until we find one in range + for recovery != nil { + if m.filterShowAll || !m.filterTimeEnabled || (recovery.Time >= m.filterTimeStart && recovery.Time <= m.filterTimeEnd) { + m.currentEventIndex = recovery.EventIndex + m.currentTime = recovery.Time + m.updateClusterState() + break + } + // Try previous recovery with StatusCode=0 + recovery = m.traceData.FindPreviousRecoveryWithStatusCode(recovery.EventIndex-1, "0") + } + + case "r": + // Jump forward to earliest MasterRecoveryState with StatusCode="0" + recovery := m.traceData.FindNextRecoveryWithStatusCode(m.currentEventIndex, "0") + // If filterShowAll is false and time filter is enabled, keep searching until we find one in range + for recovery != nil { + if m.filterShowAll || !m.filterTimeEnabled || (recovery.Time >= m.filterTimeStart && recovery.Time <= m.filterTimeEnd) { + m.currentEventIndex = recovery.EventIndex + m.currentTime = recovery.Time + m.updateClusterState() + break + } + // Try next recovery with StatusCode=0 + recovery = m.traceData.FindNextRecoveryWithStatusCode(recovery.EventIndex+1, "0") + } + + case "3": + // Jump forward to next Severity=30 event + for i := m.currentEventIndex + 1; i < len(m.traceData.Events); i++ { + event := &m.traceData.Events[i] + if event.Severity == "30" && eventMatchesFilters(event, &m) { + m.currentEventIndex = i + m.currentTime = event.TimeValue + m.updateClusterState() + break + } + } + + case "#", "shift+3": + // Jump backward to previous Severity=30 event + for i := m.currentEventIndex - 1; i >= 0; i-- { + event := &m.traceData.Events[i] + if event.Severity == "30" && eventMatchesFilters(event, &m) { + m.currentEventIndex = i + m.currentTime = event.TimeValue + m.updateClusterState() + break + } + } + + case "4": + // Jump forward to next Severity=40 event + for i := m.currentEventIndex + 1; i < len(m.traceData.Events); i++ { + event := &m.traceData.Events[i] + if event.Severity == "40" && eventMatchesFilters(event, &m) { + m.currentEventIndex = i + m.currentTime = event.TimeValue + m.updateClusterState() + break + } + } + + case "$", "shift+4": + // Jump backward to previous Severity=40 event + for i := m.currentEventIndex - 1; i >= 0; i-- { + event := &m.traceData.Events[i] + if event.Severity == "40" && eventMatchesFilters(event, &m) { + m.currentEventIndex = i + m.currentTime = event.TimeValue + m.updateClusterState() + break + } + } + + case "/": + // Always enter forward search mode (start new search) + m.searchMode = true + m.searchDirection = "forward" + m.searchInput.Focus() + return m, textinput.Blink + + case "?": + // Always enter backward search mode (start new search) + m.searchMode = true + m.searchDirection = "backward" + m.searchInput.Focus() + return m, textinput.Blink + + case "n": + // Go to next match in the original search direction + if m.searchActive && m.searchPattern != "" { + var matchIndex int + if m.searchDirection == "forward" { + matchIndex = m.searchForward(m.currentEventIndex + 1, m.searchPattern) + } else { + matchIndex = m.searchBackward(m.currentEventIndex - 1, m.searchPattern) + } + if matchIndex >= 0 { + m.currentEventIndex = matchIndex + m.currentTime = m.traceData.Events[m.currentEventIndex].TimeValue + m.updateClusterState() + } + } + + case "N", "shift+n": + // Go to previous match (opposite of original search direction) + if m.searchActive && m.searchPattern != "" { + var matchIndex int + if m.searchDirection == "forward" { + // Original was forward, so N goes backward + matchIndex = m.searchBackward(m.currentEventIndex - 1, m.searchPattern) + } else { + // Original was backward, so N goes forward + matchIndex = m.searchForward(m.currentEventIndex + 1, m.searchPattern) + } + if matchIndex >= 0 { + m.currentEventIndex = matchIndex + m.currentTime = m.traceData.Events[m.currentEventIndex].TimeValue + m.updateClusterState() + } + } + + case "esc": + // Clear search highlighting + if m.searchActive { + m.searchActive = false + m.searchPattern = "" + return m, nil + } + } + + case tea.WindowSizeMsg: + m.width = msg.Width + m.height = msg.Height + } + + return m, nil +} + +// formatRoleLabel formats a role label with ID and epoch (if applicable) +// For TLog, LogRouter, BackupWorker: shows "RoleName [ID] (e=Epoch)" +// For other roles: shows "RoleName [ID]" or just "RoleName" +func formatRoleLabel(role RoleInfo) string { + roleLabel := role.Name + if role.ID != "" { + roleLabel = fmt.Sprintf("%s [%s]", role.Name, role.ID) + } + // Add epoch for TLog, LogRouter, and BackupWorker roles + if role.Epoch != "" && (role.Name == "TLog" || role.Name == "LogRouter" || role.Name == "BackupWorker") { + roleLabel = fmt.Sprintf("%s (e=%s)", roleLabel, role.Epoch) + } + return roleLabel +} + +// formatNumberWithCommas formats an int64 with comma separators for readability +// e.g., 53407865 -> "53,407,865" +func formatNumberWithCommas(n int64) string { + if n < 0 { + return "-" + formatNumberWithCommas(-n) + } + str := fmt.Sprintf("%d", n) + if len(str) <= 3 { + return str + } + + // Insert commas from right to left + var result strings.Builder + for i, c := range str { + if i > 0 && (len(str)-i)%3 == 0 { + result.WriteRune(',') + } + result.WriteRune(c) + } + return result.String() +} + +// formatTraceEvent formats a trace event for display with config.fish field ordering and colors +func formatTraceEvent(event *TraceEvent, isCurrent bool, searchPattern string) string { + // Skip fields as per config.fish and fields shown in topology + skipFields := map[string]bool{ + "DateTime": true, + "ThreadID": true, + "LogGroup": true, + "TrackLatestType": true, + "Roles": true, // Shown in topology + } + + // Color styles + fieldNameStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("240")) // Dim + fieldValueStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("46")) // Green + currentLineStyle := lipgloss.NewStyle().Background(lipgloss.Color("58")) // Dark yellowish highlight + searchHighlightStyle := lipgloss.NewStyle().Background(lipgloss.Color("58")) // Same as current line highlight + + var parts []string + + // Compile regex for search if pattern provided + var searchRe *regexp.Regexp + if searchPattern != "" { + regexPattern := convertWildcardToRegex(searchPattern) + searchRe, _ = regexp.Compile(regexPattern) + } + + // Helper function to apply search highlighting to a field + applySearchHighlight := func(text string) string { + if searchRe == nil { + return text + } + + // Extract literal parts from the search pattern (between wildcards) + literals := extractLiterals(searchPattern) + if len(literals) == 0 { + return text + } + + // Collect all match positions for all literals + type matchPos struct { + start int + end int + } + var allMatches []matchPos + + for _, literal := range literals { + if literal == "" { + continue + } + // Create a regex for this literal (case-sensitive, escaped) + literalPattern := regexp.QuoteMeta(literal) + literalRe, err := regexp.Compile(literalPattern) + if err != nil { + continue + } + + // Find all occurrences of this literal + matches := literalRe.FindAllStringIndex(text, -1) + for _, match := range matches { + allMatches = append(allMatches, matchPos{start: match[0], end: match[1]}) + } + } + + if len(allMatches) == 0 { + return text + } + + // Sort matches by start position + sort.Slice(allMatches, func(i, j int) bool { + return allMatches[i].start < allMatches[j].start + }) + + // Merge overlapping matches + var merged []matchPos + for _, match := range allMatches { + if len(merged) == 0 { + merged = append(merged, match) + } else { + last := &merged[len(merged)-1] + if match.start <= last.end { + // Overlapping or adjacent, merge them + if match.end > last.end { + last.end = match.end + } + } else { + // Non-overlapping, add as new + merged = append(merged, match) + } + } + } + + // Build highlighted string + var result strings.Builder + lastEnd := 0 + for _, match := range merged { + // Add text before match + if match.start > lastEnd { + result.WriteString(text[lastEnd:match.start]) + } + // Add highlighted match + result.WriteString(searchHighlightStyle.Render(text[match.start:match.end])) + lastEnd = match.end + } + // Add remaining text + if lastEnd < len(text) { + result.WriteString(text[lastEnd:]) + } + return result.String() + } + + // Add fields in specific order: Time, Type, Severity (skip Machine, Roles, ID - shown in topology), then other attributes + if event.Time != "" { + parts = append(parts, fieldNameStyle.Render("Time=")+fieldValueStyle.Render(applySearchHighlight(event.Time))) + } + if event.Type != "" { + parts = append(parts, fieldNameStyle.Render("Type=")+fieldValueStyle.Render(applySearchHighlight(event.Type))) + } + if event.Severity != "" { + parts = append(parts, fieldNameStyle.Render("Severity=")+fieldValueStyle.Render(applySearchHighlight(event.Severity))) + } + // Skip Machine - shown in topology + // Skip Roles - shown in topology + // Skip ID - shown in topology + + // Add remaining attributes (sorted for consistent ordering) + var attrKeys []string + for key := range event.Attrs { + if key == "Roles" { + continue // Already handled + } + if skipFields[key] { + continue + } + attrKeys = append(attrKeys, key) + } + // Sort keys for deterministic ordering + sort.Strings(attrKeys) + + for _, key := range attrKeys { + value := event.Attrs[key] + parts = append(parts, fieldNameStyle.Render(key+"=")+lipgloss.NewStyle().Foreground(lipgloss.Color("252")).Render(applySearchHighlight(value))) + } + + line := strings.Join(parts, " ") + if isCurrent { + return currentLineStyle.Render(line) + } + return line +} + +// handleMachineSelectionPopup handles keys in the machine selection popup +func (m model) handleMachineSelectionPopup(msg tea.Msg) (tea.Model, tea.Cmd) { + var cmd tea.Cmd + + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "esc", "ctrl+c", "q": + // Exit machine selection + m.filterMachineSelectMode = false + m.filterMachineInput.Reset() + m.filterMachineInput.Blur() + return m, nil + + case "enter": + // Confirm selection and return to filter view + m.filterMachineSelectMode = false + m.filterMachineInput.Reset() + m.filterMachineInput.Blur() + return m, nil + + case "ctrl+n": + // Move down in current column + // We need to recalculate columns to know the size + items := m.getMachineSelectionItems() + maxDisplayLines := m.height - 12 + var columns [][]SelectableItem + currentColumn := []SelectableItem{} + currentHeight := 0 + + for i := 0; i < len(items); i++ { + item := items[i] + itemLines := 1 + if item.Type == "machine" && item.Worker != nil { + itemLines += len(item.Worker.Roles) + } + if currentHeight+itemLines > maxDisplayLines && len(currentColumn) > 0 { + columns = append(columns, currentColumn) + currentColumn = []SelectableItem{} + currentHeight = 0 + } + currentColumn = append(currentColumn, item) + currentHeight += itemLines + } + if len(currentColumn) > 0 { + columns = append(columns, currentColumn) + } + + // Clamp column index + if m.filterMachineColumn >= len(columns) { + m.filterMachineColumn = len(columns) - 1 + } + if m.filterMachineColumn < 0 && len(columns) > 0 { + m.filterMachineColumn = 0 + } + + // Get current column + var displayItems []SelectableItem + if m.filterMachineColumn >= 0 && m.filterMachineColumn < len(columns) { + displayItems = columns[m.filterMachineColumn] + } + + // Increment selection within current column + if len(displayItems) > 0 { + m.filterMachineSelected++ + if m.filterMachineSelected >= len(displayItems) { + m.filterMachineSelected = 0 // Wrap around + } + } + return m, nil + + case "ctrl+p": + // Move up in current column + // We need to recalculate columns to know the size + items := m.getMachineSelectionItems() + maxDisplayLines := m.height - 12 + var columns [][]SelectableItem + currentColumn := []SelectableItem{} + currentHeight := 0 + + for i := 0; i < len(items); i++ { + item := items[i] + itemLines := 1 + if item.Type == "machine" && item.Worker != nil { + itemLines += len(item.Worker.Roles) + } + if currentHeight+itemLines > maxDisplayLines && len(currentColumn) > 0 { + columns = append(columns, currentColumn) + currentColumn = []SelectableItem{} + currentHeight = 0 + } + currentColumn = append(currentColumn, item) + currentHeight += itemLines + } + if len(currentColumn) > 0 { + columns = append(columns, currentColumn) + } + + // Clamp column index + if m.filterMachineColumn >= len(columns) { + m.filterMachineColumn = len(columns) - 1 + } + if m.filterMachineColumn < 0 && len(columns) > 0 { + m.filterMachineColumn = 0 + } + + // Get current column + var displayItems []SelectableItem + if m.filterMachineColumn >= 0 && m.filterMachineColumn < len(columns) { + displayItems = columns[m.filterMachineColumn] + } + + // Decrement selection within current column + if len(displayItems) > 0 { + m.filterMachineSelected-- + if m.filterMachineSelected < 0 { + m.filterMachineSelected = len(displayItems) - 1 // Wrap around + } + } + return m, nil + + case "ctrl+f": + // Move to next column (forward) + items := m.getMachineSelectionItems() + maxDisplayLines := m.height - 12 + var columns [][]SelectableItem + currentColumn := []SelectableItem{} + currentHeight := 0 + + for i := 0; i < len(items); i++ { + item := items[i] + itemLines := 1 + if item.Type == "machine" && item.Worker != nil { + itemLines += len(item.Worker.Roles) + } + if currentHeight+itemLines > maxDisplayLines && len(currentColumn) > 0 { + columns = append(columns, currentColumn) + currentColumn = []SelectableItem{} + currentHeight = 0 + } + currentColumn = append(currentColumn, item) + currentHeight += itemLines + } + if len(currentColumn) > 0 { + columns = append(columns, currentColumn) + } + + // Move to next column + if len(columns) > 0 { + m.filterMachineColumn++ + if m.filterMachineColumn >= len(columns) { + m.filterMachineColumn = 0 // Wrap to first column + } + // Reset selection to first item in new column + m.filterMachineSelected = 0 + } + return m, nil + + case "ctrl+b": + // Move to previous column (backward) + items := m.getMachineSelectionItems() + maxDisplayLines := m.height - 12 + var columns [][]SelectableItem + currentColumn := []SelectableItem{} + currentHeight := 0 + + for i := 0; i < len(items); i++ { + item := items[i] + itemLines := 1 + if item.Type == "machine" && item.Worker != nil { + itemLines += len(item.Worker.Roles) + } + if currentHeight+itemLines > maxDisplayLines && len(currentColumn) > 0 { + columns = append(columns, currentColumn) + currentColumn = []SelectableItem{} + currentHeight = 0 + } + currentColumn = append(currentColumn, item) + currentHeight += itemLines + } + if len(currentColumn) > 0 { + columns = append(columns, currentColumn) + } + + // Move to previous column + if len(columns) > 0 { + m.filterMachineColumn-- + if m.filterMachineColumn < 0 { + m.filterMachineColumn = len(columns) - 1 // Wrap to last column + } + // Reset selection to first item in new column + m.filterMachineSelected = 0 + } + return m, nil + + case " ", "space": + // Toggle selection of current item in current column + // Recalculate columns to get current item + items := m.getMachineSelectionItems() + maxDisplayLines := m.height - 12 + var columns [][]SelectableItem + currentColumn := []SelectableItem{} + currentHeight := 0 + + for i := 0; i < len(items); i++ { + item := items[i] + itemLines := 1 + if item.Type == "machine" && item.Worker != nil { + itemLines += len(item.Worker.Roles) + } + if currentHeight+itemLines > maxDisplayLines && len(currentColumn) > 0 { + columns = append(columns, currentColumn) + currentColumn = []SelectableItem{} + currentHeight = 0 + } + currentColumn = append(currentColumn, item) + currentHeight += itemLines + } + if len(currentColumn) > 0 { + columns = append(columns, currentColumn) + } + + // Clamp column index + if m.filterMachineColumn >= len(columns) { + m.filterMachineColumn = len(columns) - 1 + } + if m.filterMachineColumn < 0 && len(columns) > 0 { + m.filterMachineColumn = 0 + } + + // Get current column and item + var displayItems []SelectableItem + if m.filterMachineColumn >= 0 && m.filterMachineColumn < len(columns) { + displayItems = columns[m.filterMachineColumn] + } + + if m.filterMachineSelected >= 0 && m.filterMachineSelected < len(displayItems) { + item := displayItems[m.filterMachineSelected] + + if item.Type == "dc" && item.DC != "Testers" { + // Toggle DC selection + m.filterMachineDCs[item.DC] = !m.filterMachineDCs[item.DC] + } else if item.Type == "machine" { + // Toggle individual machine selection + found := false + for i, m2 := range m.filterMachineList { + if m2 == item.Machine { + // Remove it + m.filterMachineList = append(m.filterMachineList[:i], m.filterMachineList[i+1:]...) + found = true + break + } + } + if !found { + m.filterMachineList = append(m.filterMachineList, item.Machine) + } + // Rebuild machine set for O(1) lookups + m.rebuildMachineSet() + } + } + return m, nil + + default: + // Pass other keys (except space) to input for fuzzy search + // Reset selection and column when search changes + oldValue := m.filterMachineInput.Value() + m.filterMachineInput, cmd = m.filterMachineInput.Update(msg) + if m.filterMachineInput.Value() != oldValue { + m.filterMachineSelected = 0 + m.filterMachineColumn = 0 // Also reset to first column + } + return m, cmd + } + } + return m, nil +} + +// handleFilterTimeInput handles time range input +func (m model) handleFilterTimeInput(msg tea.Msg) (tea.Model, tea.Cmd) { + var cmd tea.Cmd + + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "esc", "ctrl+c": + // Cancel time input + m.filterTimeInputMode = false + m.filterTimeInput.Reset() + m.filterTimeInput.Blur() + return m, nil + + case "enter": + // Save the time value + if timeStr := m.filterTimeInput.Value(); timeStr != "" { + if t, err := strconv.ParseFloat(timeStr, 64); err == nil { + if m.filterTimeEditingStart { + m.filterTimeStart = t + // Move to editing end time + m.filterTimeEditingStart = false + m.filterTimeInput.SetValue(fmt.Sprintf("%.6f", m.filterTimeEnd)) + return m, nil + } else { + m.filterTimeEnd = t + // Enable filter and exit + m.filterTimeEnabled = true + m.filterTimeInputMode = false + m.filterTimeInput.Reset() + m.filterTimeInput.Blur() + return m, nil + } + } + } + return m, nil + + case "tab": + // Toggle between start and end + if timeStr := m.filterTimeInput.Value(); timeStr != "" { + if t, err := strconv.ParseFloat(timeStr, 64); err == nil { + if m.filterTimeEditingStart { + m.filterTimeStart = t + } else { + m.filterTimeEnd = t + } + } + } + m.filterTimeEditingStart = !m.filterTimeEditingStart + if m.filterTimeEditingStart { + m.filterTimeInput.SetValue(fmt.Sprintf("%.6f", m.filterTimeStart)) + } else { + m.filterTimeInput.SetValue(fmt.Sprintf("%.6f", m.filterTimeEnd)) + } + return m, nil + + default: + m.filterTimeInput, cmd = m.filterTimeInput.Update(msg) + return m, cmd + } + } + return m, nil +} + +// handleTypeSearchPopup handles the Type search popup for fuzzy matching Type values +func (m model) handleTypeSearchPopup(msg tea.Msg) (tea.Model, tea.Cmd) { + var cmd tea.Cmd + + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "esc", "ctrl+c": + // Cancel Type search + m.filterTypeSearchMode = false + m.filterTypeSearchInput.Reset() + m.filterTypeSearchInput.Blur() + m.filterTypeSearchSelected = 0 + return m, nil + + case "enter": + // Add selected Type as filter + searchTerm := strings.ToLower(m.filterTypeSearchInput.Value()) + var filteredTypes []string + for _, t := range m.filterTypeSearchList { + if searchTerm == "" || strings.Contains(strings.ToLower(t), searchTerm) { + filteredTypes = append(filteredTypes, t) + } + } + + if len(filteredTypes) > 0 && m.filterTypeSearchSelected >= 0 && m.filterTypeSearchSelected < len(filteredTypes) { + selectedType := filteredTypes[m.filterTypeSearchSelected] + // Add filter with trailing space for exact matching + filterPattern := fmt.Sprintf("Type=%s ", selectedType) + m.filterRawList = append(m.filterRawList, filterPattern) + m.filterRawSelectedIndex = len(m.filterRawList) - 1 + // Update column index + maxRows := 5 + m.filterRawColumn = m.filterRawSelectedIndex / maxRows + // Recompile regex patterns for performance + m.recompileRawFilterRegexes() + } + + // Close popup + m.filterTypeSearchMode = false + m.filterTypeSearchInput.Reset() + m.filterTypeSearchInput.Blur() + m.filterTypeSearchSelected = 0 + // Ensure current event is visible after adding filter + m.ensureCurrentEventVisible() + return m, nil + + case "ctrl+n": + // Move down in filtered list + searchTerm := strings.ToLower(m.filterTypeSearchInput.Value()) + var filteredTypes []string + for _, t := range m.filterTypeSearchList { + if searchTerm == "" || strings.Contains(strings.ToLower(t), searchTerm) { + filteredTypes = append(filteredTypes, t) + } + } + + if len(filteredTypes) > 0 { + m.filterTypeSearchSelected++ + if m.filterTypeSearchSelected >= len(filteredTypes) { + m.filterTypeSearchSelected = 0 // Wrap around + } + } + return m, nil + + case "ctrl+p": + // Move up in filtered list + searchTerm := strings.ToLower(m.filterTypeSearchInput.Value()) + var filteredTypes []string + for _, t := range m.filterTypeSearchList { + if searchTerm == "" || strings.Contains(strings.ToLower(t), searchTerm) { + filteredTypes = append(filteredTypes, t) + } + } + + if len(filteredTypes) > 0 { + m.filterTypeSearchSelected-- + if m.filterTypeSearchSelected < 0 { + m.filterTypeSearchSelected = len(filteredTypes) - 1 // Wrap around + } + } + return m, nil + + default: + // Pass other keys to input for fuzzy search + // Reset selection when search changes + oldValue := m.filterTypeSearchInput.Value() + m.filterTypeSearchInput, cmd = m.filterTypeSearchInput.Update(msg) + if m.filterTypeSearchInput.Value() != oldValue { + m.filterTypeSearchSelected = 0 + } + return m, cmd + } + } + return m, nil +} + +// getFilteredMachineList returns lists of DCs and machines filtered by search input +func (m model) getFilteredMachineList() ([]string, []string) { + searchTerm := strings.ToLower(m.filterMachineInput.Value()) + + // Collect all unique machines and DCs from trace data + machineSet := make(map[string]bool) + dcSet := make(map[string]bool) + + for _, event := range m.traceData.Events { + if event.Machine != "" { + machineSet[event.Machine] = true + // Extract DC from machine address + if dc := extractDCFromAddress(event.Machine); dc != "" { + dcSet[dc] = true + } + } + } + + // Convert to sorted slices + var machines []string + for machine := range machineSet { + if searchTerm == "" || strings.Contains(strings.ToLower(machine), searchTerm) { + machines = append(machines, machine) + } + } + sort.Strings(machines) + + var dcs []string + for dc := range dcSet { + dcs = append(dcs, dc) + } + sort.Strings(dcs) + + return machines, dcs +} + +// extractDCFromAddress extracts DC ID from machine address +func extractDCFromAddress(addr string) string { + // Format: [abcd::X:Y:Z:W]:Port or X.Y.Z.W:Port + // Y is the DC ID + addr = strings.TrimPrefix(addr, "[") + addr = strings.TrimSuffix(addr, "]") + + parts := strings.Split(addr, ":") + if len(parts) >= 4 { + // IPv6 format: abcd::X:Y:Z:W + return parts[len(parts)-3] + } else if len(parts) >= 1 { + // IPv4 format: X.Y.Z.W + dotParts := strings.Split(parts[0], ".") + if len(dotParts) >= 2 { + return dotParts[1] + } + } + return "" +} + +// SelectableItem represents an item in the machine selection popup +type SelectableItem struct { + Type string // "dc" or "machine" + DC string // DC ID for "dc" type + Machine string // Machine address for "machine" type + Worker *Worker // Worker info for displaying roles + IsTester bool +} + +// getMachineSelectionItems builds the list of selectable items for machine selection popup +func (m model) getMachineSelectionItems() []SelectableItem { + // Build machine topology similar to main view + clusterState := BuildClusterState(m.traceData.Events) + dcWorkers := clusterState.GetWorkersByDC() + testers := clusterState.GetTesters() + + searchTerm := strings.ToLower(m.filterMachineInput.Value()) + + var items []SelectableItem + + // Add DCs first + var dcIDs []string + for dcID := range dcWorkers { + dcIDs = append(dcIDs, dcID) + } + sort.Strings(dcIDs) + + for _, dcID := range dcIDs { + // Check if DC or any of its machines match search + dcMatches := searchTerm == "" || strings.Contains(strings.ToLower("dc"+dcID), searchTerm) + machineMatches := false + + if !dcMatches { + // Check if any machine in this DC matches + for _, worker := range dcWorkers[dcID] { + if strings.Contains(strings.ToLower(worker.Machine), searchTerm) { + machineMatches = true + break + } + } + } + + if dcMatches || machineMatches { + items = append(items, SelectableItem{Type: "dc", DC: dcID}) + // Add machines under this DC + for _, worker := range dcWorkers[dcID] { + // If DC matched, show all machines. Otherwise only show matching machines. + if dcMatches || strings.Contains(strings.ToLower(worker.Machine), searchTerm) { + items = append(items, SelectableItem{Type: "machine", Machine: worker.Machine, Worker: worker, IsTester: false}) + } + } + } + } + + // Add testers + if len(testers) > 0 { + testersMatches := searchTerm == "" || strings.Contains(strings.ToLower("testers"), searchTerm) + machineMatches := false + + if !testersMatches { + // Check if any tester machine matches + for _, worker := range testers { + if strings.Contains(strings.ToLower(worker.Machine), searchTerm) { + machineMatches = true + break + } + } + } + + if testersMatches || machineMatches { + items = append(items, SelectableItem{Type: "dc", DC: "Testers"}) + for _, worker := range testers { + // If "Testers" matched, show all. Otherwise only show matching machines. + if testersMatches || strings.Contains(strings.ToLower(worker.Machine), searchTerm) { + items = append(items, SelectableItem{Type: "machine", Machine: worker.Machine, Worker: worker, IsTester: true}) + } + } + } + } + + return items +} + + +func (m *model) updateClusterState() { + // Get events up to and including the current event index + events := m.traceData.Events[:m.currentEventIndex+1] + m.clusterState = BuildClusterState(events) +} + +// ensureCurrentEventVisible makes sure the current event index points to a visible event +// If current event is filtered out, find the nearest visible event +func (m *model) ensureCurrentEventVisible() { + // Check if current event is visible + if m.currentEventIndex >= 0 && m.currentEventIndex < len(m.traceData.Events) { + if eventMatchesFilters(&m.traceData.Events[m.currentEventIndex], m) { + // Current event is visible, nothing to do + return + } + } + + // Current event is filtered out, find nearest visible event + // Try forward first + for i := m.currentEventIndex + 1; i < len(m.traceData.Events); i++ { + if eventMatchesFilters(&m.traceData.Events[i], m) { + m.currentEventIndex = i + m.currentTime = m.traceData.Events[i].TimeValue + m.updateClusterState() + return + } + } + + // Try backward + for i := m.currentEventIndex - 1; i >= 0; i-- { + if eventMatchesFilters(&m.traceData.Events[i], m) { + m.currentEventIndex = i + m.currentTime = m.traceData.Events[i].TimeValue + m.updateClusterState() + return + } + } + + // No visible events found, stay at current (but it won't be visible) +} + +// buildEventListPane builds the event list pane showing events around current time +func (m model) buildEventListPane(availableHeight int, paneWidth int, searchPattern string) []string { + var lines []string + currentIdx := m.currentEventIndex + + // We need to center based on LINE count, not event count + // First, render the current event to see how many lines it takes + currentEvent := &m.traceData.Events[currentIdx] + currentEventLine := formatTraceEvent(currentEvent, false, searchPattern) + currentWrappedLines := wrapText(currentEventLine, paneWidth) + currentEventLineCount := len(currentWrappedLines) + + // Calculate how many lines we want above and below current event + targetLinesAbove := availableHeight / 2 + + // Build lines going backwards from current event + var linesAbove []string + lineCount := 0 + for i := currentIdx - 1; i >= 0 && lineCount < targetLinesAbove; i-- { + event := &m.traceData.Events[i] + + // Skip filtered events + if !eventMatchesFilters(event, &m) { + continue + } + + eventLine := formatTraceEvent(event, false, "") // No highlighting for non-current events + wrappedLines := wrapText(eventLine, paneWidth) + + // Check if adding this event would exceed our target + if lineCount+len(wrappedLines) > targetLinesAbove { + break + } + + // Prepend to linesAbove (we're going backwards) + for j := len(wrappedLines) - 1; j >= 0; j-- { + linesAbove = append([]string{wrappedLines[j]}, linesAbove...) + } + lineCount += len(wrappedLines) + } + + // Add lines above + lines = append(lines, linesAbove...) + + // Add current event with highlight (only first line) + highlightStyle := lipgloss.NewStyle().Background(lipgloss.Color("58")) + for i, line := range currentWrappedLines { + if i == 0 { + // Highlight only the first line (where Time= appears) + lines = append(lines, highlightStyle.Render(line)) + } else { + // Subsequent wrapped lines are not highlighted + lines = append(lines, line) + } + } + + // Build lines going forwards from current event + lineCount = len(linesAbove) + currentEventLineCount + for i := currentIdx + 1; i < len(m.traceData.Events) && lineCount < availableHeight; i++ { + event := &m.traceData.Events[i] + + // Skip filtered events + if !eventMatchesFilters(event, &m) { + continue + } + + eventLine := formatTraceEvent(event, false, "") // No highlighting for non-current events + wrappedLines := wrapText(eventLine, paneWidth) + + // Check if adding this event would exceed available height + if lineCount+len(wrappedLines) > availableHeight { + break + } + + for _, line := range wrappedLines { + lines = append(lines, line) + } + lineCount += len(wrappedLines) + } + + return lines +} + +// wrapText wraps text to fit within the specified width, preserving ANSI color codes +func wrapText(text string, width int) []string { + if width <= 0 { + return []string{text} + } + + // Use lipgloss to wrap text + wrapStyle := lipgloss.NewStyle().Width(width) + wrapped := wrapStyle.Render(text) + + // Split into lines + lines := strings.Split(wrapped, "\n") + return lines +} + + +// View renders the UI (required by Bubbletea) +func (m model) View() string { + // Get current event's machine and ID for highlighting in topology + var currentMachine string + var currentID string + var currentEvent *TraceEvent + if m.currentEventIndex >= 0 && m.currentEventIndex < len(m.traceData.Events) { + currentEvent = &m.traceData.Events[m.currentEventIndex] + currentMachine = currentEvent.Machine + currentID = currentEvent.ID + } + + // Calculate available height for topology (reserve space for config, recovery, scrubber, help) + // Bottom section (with borders and padding): + // - configStyle border + padding + content = 3 lines + // - recovery line = 1 line + // - scrubberStyle border + padding + content = 3 lines + // - help line = 1 line + // - epoch info line = 1 line + // Total bottom = 9 lines + availableHeight := m.height - 9 + if availableHeight < 1 { + availableHeight = 1 // Minimum 1 line + } + + // Styles + dcHeaderStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("33")). + Underline(true). + MarginTop(0). + MarginBottom(0) + + testerHeaderStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("135")). + Underline(true). + MarginTop(0). + MarginBottom(0) + + workerStyleGray := lipgloss.NewStyle(). + Foreground(lipgloss.Color("240")). + PaddingLeft(2) + + workerStyleGreen := lipgloss.NewStyle(). + Foreground(lipgloss.Color("46")). + Bold(true). + PaddingLeft(2) + + // Style for current machine (event source) - cyan with arrow + workerStyleCurrent := lipgloss.NewStyle(). + Foreground(lipgloss.Color("51")). + Bold(true). + PaddingLeft(0) + + roleStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("252")) // Normal gray color + + // Style for current role (when ID matches) + roleStyleCurrent := lipgloss.NewStyle(). + Foreground(lipgloss.Color("51")). + Bold(true) + + scrubberStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("241")). + PaddingLeft(1) + + helpStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("241")) + + // Get workers grouped by DC and testers + dcWorkers := m.clusterState.GetWorkersByDC() + testers := m.clusterState.GetTesters() + + // Extract NetworkMessageSent info if current event is a network message + var networkMsg *struct { + SrcAddr string + DstAddr string + RPCName string + StrippedRPC string + SrcExists bool // Whether source machine exists in topology + DstExists bool // Whether destination machine exists in topology + } + if currentEvent != nil && currentEvent.Type == "NetworkMessageSent" { + srcAddr := currentEvent.Attrs["SrcAddr"] + dstAddr := currentEvent.Attrs["DstAddr"] + rpcName := currentEvent.Attrs["RPCName"] + if srcAddr != "" && dstAddr != "" && rpcName != "" { + // Normalize addresses to handle :tls suffix inconsistency + normalizedSrc := normalizeAddress(srcAddr) + normalizedDst := normalizeAddress(dstAddr) + + // Check if src and dst exist in topology + srcExists := false + dstExists := false + + // Check in dcWorkers + for _, workers := range dcWorkers { + for _, worker := range workers { + normalizedWorker := normalizeAddress(worker.Machine) + if normalizedWorker == normalizedSrc { + srcExists = true + } + if normalizedWorker == normalizedDst { + dstExists = true + } + } + } + + // Check in testers + for _, worker := range testers { + normalizedWorker := normalizeAddress(worker.Machine) + if normalizedWorker == normalizedSrc { + srcExists = true + } + if normalizedWorker == normalizedDst { + dstExists = true + } + } + + networkMsg = &struct { + SrcAddr string + DstAddr string + RPCName string + StrippedRPC string + SrcExists bool + DstExists bool + }{ + SrcAddr: srcAddr, + DstAddr: dstAddr, + RPCName: rpcName, + StrippedRPC: stripRPCName(rpcName), + SrcExists: srcExists, + DstExists: dstExists, + } + } + } + + // Build all topology lines first (before packing into columns) + var allTopologyLines []string + + if len(dcWorkers) == 0 && len(testers) == 0 { + allTopologyLines = append(allTopologyLines, "") + allTopologyLines = append(allTopologyLines, " ") + } else { + // Display main machines grouped by DC + if len(dcWorkers) > 0 { + // Sort DC IDs for consistent display + dcIDs := make([]string, 0, len(dcWorkers)) + for dcID := range dcWorkers { + dcIDs = append(dcIDs, dcID) + } + // Simple sort + for i := 0; i < len(dcIDs); i++ { + for j := i + 1; j < len(dcIDs); j++ { + if dcIDs[i] > dcIDs[j] { + dcIDs[i], dcIDs[j] = dcIDs[j], dcIDs[i] + } + } + } + + // Display each DC + for _, dcID := range dcIDs { + workers := dcWorkers[dcID] + allTopologyLines = append(allTopologyLines, dcHeaderStyle.Render(fmt.Sprintf("DC%s", dcID))) + + for _, worker := range workers { + // Check if this worker's machine matches current event + isCurrentMachine := worker.Machine == currentMachine + + // Check if this worker's machine is the source of a network message + // Normalize addresses to handle :tls suffix inconsistency + isNetworkSrc := networkMsg != nil && normalizeAddress(worker.Machine) == normalizeAddress(networkMsg.SrcAddr) + + // Check if this worker's machine is the destination of a network message + isNetworkDst := networkMsg != nil && normalizeAddress(worker.Machine) == normalizeAddress(networkMsg.DstAddr) + + // Check if any role ID matches current event ID + var matchingRole *RoleInfo + if isCurrentMachine && currentID != "" { + for i := range worker.Roles { + if worker.Roles[i].ID == currentID { + matchingRole = &worker.Roles[i] + break + } + } + } + + // Display machine address at top level (no ID here) + if matchingRole != nil { + // Role ID matches - show machine normally, will highlight role below + workerLine := fmt.Sprintf("● %s", worker.Machine) + + // Highlight network message src/dst with yellow background and directional arrow + if isNetworkSrc { + // Source: yellow background with →→→ at end + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s →→→", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else if isNetworkDst { + // Destination: yellow background with ←←← at end + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s ←←←", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else if worker.HasNonWorkerRoles() { + allTopologyLines = append(allTopologyLines, workerStyleGreen.Render(workerLine)) + } else { + allTopologyLines = append(allTopologyLines, workerStyleGray.Render(workerLine)) + } + + // Show each role, highlighting the one with matching ID + for _, role := range worker.Roles { + roleLabel := formatRoleLabel(role) + if role.ID == currentID { + // Highlight this specific role + allTopologyLines = append(allTopologyLines, roleStyleCurrent.Render(" → "+roleLabel)) + } else { + allTopologyLines = append(allTopologyLines, roleStyle.Render(" "+roleLabel)) + } + } + } else if isCurrentMachine { + // Machine matches but no role ID match - highlight machine + workerLine := fmt.Sprintf("● %s", worker.Machine) + + // Network message highlighting takes precedence + if isNetworkSrc { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s →→→", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else if isNetworkDst { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s ←←←", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else { + allTopologyLines = append(allTopologyLines, workerStyleCurrent.Render("→ "+workerLine)) + } + + // Show all roles normally + for _, role := range worker.Roles { + roleLabel := formatRoleLabel(role) + allTopologyLines = append(allTopologyLines, roleStyle.Render(" "+roleLabel)) + } + } else if worker.HasNonWorkerRoles() { + // Normal worker with roles (no match) + workerLine := fmt.Sprintf("● %s", worker.Machine) + + if isNetworkSrc { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s →→→", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else if isNetworkDst { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s ←←←", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else { + allTopologyLines = append(allTopologyLines, workerStyleGreen.Render(workerLine)) + } + + for _, role := range worker.Roles { + roleLabel := formatRoleLabel(role) + allTopologyLines = append(allTopologyLines, roleStyle.Render(" "+roleLabel)) + } + } else { + // Worker without roles OR only Worker role (no match) + workerLine := fmt.Sprintf("● %s", worker.Machine) + + if isNetworkSrc { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s →→→", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else if isNetworkDst { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s ←←←", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else { + allTopologyLines = append(allTopologyLines, workerStyleGray.Render(workerLine)) + } + + // Show all roles (including Worker if present) + for _, role := range worker.Roles { + roleLabel := formatRoleLabel(role) + allTopologyLines = append(allTopologyLines, roleStyle.Render(" "+roleLabel)) + } + } + } + } + } + + // Display testers in a separate section + if len(testers) > 0 { + allTopologyLines = append(allTopologyLines, testerHeaderStyle.Render("Testers")) + + for _, worker := range testers { + // Check if this tester's machine matches current event + isCurrentMachine := worker.Machine == currentMachine + + // Check if this worker's machine is the source of a network message + // Normalize addresses to handle :tls suffix inconsistency + isNetworkSrc := networkMsg != nil && normalizeAddress(worker.Machine) == normalizeAddress(networkMsg.SrcAddr) + + // Check if this worker's machine is the destination of a network message + isNetworkDst := networkMsg != nil && normalizeAddress(worker.Machine) == normalizeAddress(networkMsg.DstAddr) + + // Check if any role ID matches current event ID + var matchingRole *RoleInfo + if isCurrentMachine && currentID != "" { + for i := range worker.Roles { + if worker.Roles[i].ID == currentID { + matchingRole = &worker.Roles[i] + break + } + } + } + + // Display machine address at top level (no ID here) + if matchingRole != nil { + // Role ID matches - show machine normally, will highlight role below + workerLine := fmt.Sprintf("● %s", worker.Machine) + + // Highlight network message src/dst with yellow background and directional arrow + if isNetworkSrc { + // Source: yellow background with →→→ at end + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s →→→", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else if isNetworkDst { + // Destination: yellow background with ←←← at end + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s ←←←", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else if worker.HasNonWorkerRoles() { + allTopologyLines = append(allTopologyLines, workerStyleGreen.Render(workerLine)) + } else { + allTopologyLines = append(allTopologyLines, workerStyleGray.Render(workerLine)) + } + + // Show each role, highlighting the one with matching ID + for _, role := range worker.Roles { + roleLabel := formatRoleLabel(role) + if role.ID == currentID { + // Highlight this specific role + allTopologyLines = append(allTopologyLines, roleStyleCurrent.Render(" → "+roleLabel)) + } else { + allTopologyLines = append(allTopologyLines, roleStyle.Render(" "+roleLabel)) + } + } + } else if isCurrentMachine { + // Machine matches but no role ID match - highlight machine + workerLine := fmt.Sprintf("● %s", worker.Machine) + + // Network message highlighting takes precedence + if isNetworkSrc { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s →→→", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else if isNetworkDst { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s ←←←", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else { + allTopologyLines = append(allTopologyLines, workerStyleCurrent.Render("→ "+workerLine)) + } + + // Show all roles normally + for _, role := range worker.Roles { + roleLabel := formatRoleLabel(role) + allTopologyLines = append(allTopologyLines, roleStyle.Render(" "+roleLabel)) + } + } else if worker.HasNonWorkerRoles() { + // Normal tester with roles (no match) + workerLine := fmt.Sprintf("● %s", worker.Machine) + + if isNetworkSrc { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s →→→", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else if isNetworkDst { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s ←←←", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else { + allTopologyLines = append(allTopologyLines, workerStyleGreen.Render(workerLine)) + } + + for _, role := range worker.Roles { + roleLabel := formatRoleLabel(role) + allTopologyLines = append(allTopologyLines, roleStyle.Render(" "+roleLabel)) + } + } else { + // Tester without roles OR only Worker role (no match) + workerLine := fmt.Sprintf("● %s", worker.Machine) + + if isNetworkSrc { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s →→→", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else if isNetworkDst { + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + workerLine = fmt.Sprintf("● %s ←←←", worker.Machine) + allTopologyLines = append(allTopologyLines, networkStyle.Render(workerLine)) + } else { + allTopologyLines = append(allTopologyLines, workerStyleGray.Render(workerLine)) + } + + // Show all roles (including Worker if present) + for _, role := range worker.Roles { + roleLabel := formatRoleLabel(role) + allTopologyLines = append(allTopologyLines, roleStyle.Render(" "+roleLabel)) + } + } + } + } + } + + // If we have a network message but source machine was not found in topology, show it separately + if networkMsg != nil && !networkMsg.SrcExists { + // Source not found, show with yellow highlight and →→→ arrow + allTopologyLines = append(allTopologyLines, "") + networkStyle := lipgloss.NewStyle().Background(lipgloss.Color("220")).Foreground(lipgloss.Color("0")).Bold(true) + allTopologyLines = append(allTopologyLines, networkStyle.Render(fmt.Sprintf("● %s →→→", networkMsg.SrcAddr))) + } + + // Add RPC name as a dedicated last row in topology section + if networkMsg != nil && networkMsg.StrippedRPC != "" { + allTopologyLines = append(allTopologyLines, "") + rpcStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("220")). + Bold(true). + Underline(true) + allTopologyLines = append(allTopologyLines, rpcStyle.Render(fmt.Sprintf(" RPC: %s", networkMsg.StrippedRPC))) + } + + // Pack lines into columns based on available height + // Keep machines and their roles together (don't split across columns) + var columns [][]string + if len(allTopologyLines) <= availableHeight { + // Everything fits in one column + columns = [][]string{allTopologyLines} + } else { + // Need multiple columns - group lines to keep machines together + currentColumn := []string{} + i := 0 + for i < len(allTopologyLines) { + line := allTopologyLines[i] + + // Check if this is a DC header or machine line (starts with "DC", "Testers", "●", or "→") + // These mark the start of a new logical group + isGroupStart := strings.HasPrefix(line, "DC ") || + strings.HasPrefix(line, "Testers") || + strings.Contains(line, "● ") || + strings.Contains(line, "→ ●") + + if isGroupStart && len(currentColumn) > 0 { + // Peek ahead to see how many lines this group needs + groupSize := 1 // Current line + for j := i + 1; j < len(allTopologyLines); j++ { + nextLine := allTopologyLines[j] + // Check if next line is a role (indented) or another group start + if strings.HasPrefix(nextLine, "DC ") || + strings.HasPrefix(nextLine, "Testers") || + strings.Contains(nextLine, "● ") || + strings.Contains(nextLine, "→ ●") { + break // End of this group + } + groupSize++ + } + + // If adding this group would overflow, start new column + if len(currentColumn) + groupSize > availableHeight { + columns = append(columns, currentColumn) + currentColumn = []string{} + } + } + + currentColumn = append(currentColumn, line) + i++ + } + // Add the last column + if len(currentColumn) > 0 { + columns = append(columns, currentColumn) + } + } + + // Calculate max width for each column + columnWidths := make([]int, len(columns)) + for colIdx, column := range columns { + maxWidth := 0 + for _, line := range column { + width := lipgloss.Width(line) + if width > maxWidth { + maxWidth = width + } + } + columnWidths[colIdx] = maxWidth + 2 // Add 2 for spacing between columns + } + + // Calculate total left pane width + leftPaneWidth := 0 + for _, w := range columnWidths { + leftPaneWidth += w + } + if leftPaneWidth < 20 { + leftPaneWidth = 20 + } + // Don't take more than 60% of screen + maxLeftWidth := (m.width * 3) / 5 + if leftPaneWidth > maxLeftWidth { + leftPaneWidth = maxLeftWidth + } + + // Calculate right pane width (rest of screen minus border) + rightPaneWidth := m.width - leftPaneWidth - 3 // 3 for " │ " + if rightPaneWidth < 30 { + rightPaneWidth = 30 + } + + // Build right pane (event list) content as array of lines + // If in search mode, reserve 1 line for search bar + eventListHeight := availableHeight + if m.searchMode { + eventListHeight = availableHeight - 1 + } + + // Pass search pattern if search is active + searchPattern := "" + if m.searchActive { + searchPattern = m.searchPattern + } + eventLines := m.buildEventListPane(eventListHeight, rightPaneWidth, searchPattern) + + // If in search mode, add search bar as last line + if m.searchMode { + searchBarStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("252")) + var searchBar string + if m.searchDirection == "forward" { + searchBar = "/" + m.searchInput.View() + } else { + searchBar = "?" + m.searchInput.View() + } + eventLines = append(eventLines, searchBarStyle.Render(searchBar)) + } + + // Pad columns to same height + maxLines := availableHeight + for i := range columns { + for len(columns[i]) < maxLines { + columns[i] = append(columns[i], "") + } + } + for len(eventLines) < maxLines { + eventLines = append(eventLines, "") + } + + // Build split view line by line with columnar topology + var splitContent strings.Builder + borderStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("240")) + + for lineIdx := 0; lineIdx < maxLines; lineIdx++ { + // Render all topology columns for this line + for colIdx, column := range columns { + var line string + if lineIdx < len(column) { + line = column[lineIdx] + } + + // Pad to column width + lineWidth := lipgloss.Width(line) + targetWidth := columnWidths[colIdx] + if lineWidth < targetWidth { + line = line + strings.Repeat(" ", targetWidth-lineWidth) + } + + splitContent.WriteString(line) + } + + // Add border and event line + splitContent.WriteString(borderStyle.Render(" │ ")) + + if lineIdx < len(eventLines) { + splitContent.WriteString(eventLines[lineIdx]) + } + + splitContent.WriteString("\n") + } + + // Build bottom section (spans full width) + var bottomSection strings.Builder + + // Add separator + separator := strings.Repeat("─", m.width) + bottomSection.WriteString(lipgloss.NewStyle().Foreground(lipgloss.Color("240")).Render(separator)) + bottomSection.WriteString("\n") + + // DB Configuration section + config := m.traceData.GetLatestConfigAtTime(m.currentTime) + if config != nil { + configStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("243")). + PaddingLeft(1) + + configTitleStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("39")). + Bold(true) + + configValueStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("252")) + + configContent := configTitleStyle.Render(fmt.Sprintf("DB Config (t=%.2fs)", config.Time)) + " " + + // Build compact config display with exact field names + configParts := []string{} + if config.RedundancyMode != "" { + configParts = append(configParts, fmt.Sprintf("redundancy_mode=%s", config.RedundancyMode)) + } + if config.UsableRegions > 0 { + configParts = append(configParts, fmt.Sprintf("usable_regions=%d", config.UsableRegions)) + } + if config.Logs > 0 { + configParts = append(configParts, fmt.Sprintf("logs=%d", config.Logs)) + } + if config.LogRouters > 0 { + configParts = append(configParts, fmt.Sprintf("log_routers=%d", config.LogRouters)) + } + if config.RemoteLogs > 0 { + configParts = append(configParts, fmt.Sprintf("remote_logs=%d", config.RemoteLogs)) + } + if config.Proxies > 0 { + configParts = append(configParts, fmt.Sprintf("proxies=%d", config.Proxies)) + } + if config.GrvProxies > 0 { + configParts = append(configParts, fmt.Sprintf("grv_proxies=%d", config.GrvProxies)) + } + if config.BackupWorkerEnabled > 0 { + configParts = append(configParts, fmt.Sprintf("backup_worker_enabled=%d", config.BackupWorkerEnabled)) + } + if config.StorageEngine != "" { + configParts = append(configParts, fmt.Sprintf("storage_engine=%s", config.StorageEngine)) + } + + configContent += configValueStyle.Render(strings.Join(configParts, " | ")) + bottomSection.WriteString(configStyle.Render(configContent)) + bottomSection.WriteString("\n") + } + + // Recovery State section + recoveryState := m.traceData.GetLatestRecoveryStateAtIndex(m.currentEventIndex) + if recoveryState != nil { + recoveryStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("243")). + PaddingLeft(1) + + recoveryTitleStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("39")). + Bold(true) + + // Color code based on StatusCode value + var recoveryValueStyle lipgloss.Style + if statusCode, err := strconv.Atoi(recoveryState.StatusCode); err == nil { + if statusCode < 11 { + // Red for < 11 + recoveryValueStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("196")) + } else if statusCode >= 11 && statusCode < 14 { + // Blue for 11 <= statusCode < 14 + recoveryValueStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("39")) + } else if statusCode == 14 { + // Green for = 14 + recoveryValueStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("46")) + } else { + // Default gray for > 14 + recoveryValueStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("252")) + } + } else { + // Default gray if can't parse + recoveryValueStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("252")) + } + + recoveryContent := recoveryTitleStyle.Render(fmt.Sprintf("Recovery State (t=%.6fs)", recoveryState.Time)) + " " + recoveryContent += recoveryValueStyle.Render(fmt.Sprintf("StatusCode=%s | Status=%s", recoveryState.StatusCode, recoveryState.Status)) + + bottomSection.WriteString(recoveryStyle.Render(recoveryContent)) + bottomSection.WriteString("\n") + } + + // Epoch Version Info section + epochInfo := m.traceData.GetLatestEpochVersionAtIndex(m.currentEventIndex) + if epochInfo != nil { + epochStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("243")). + PaddingLeft(1) + + epochTitleStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("39")). + Bold(true) + + epochValueStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("252")) // Same gray/white as config values + + // Format KCV - show "n/a" if not available + kcvStr := "n/a" + if epochInfo.HasKCV && epochInfo.KCV > 0 { + kcvStr = formatNumberWithCommas(epochInfo.KCV) + } + + epochContent := epochTitleStyle.Render(fmt.Sprintf("Epoch (t=%.6fs)", epochInfo.Time)) + " " + epochContent += epochValueStyle.Render(fmt.Sprintf("epoch=%d | KCV=%s | RV=%s | recoveryTxnVersion=%s", + epochInfo.Epoch, kcvStr, formatNumberWithCommas(epochInfo.RV), formatNumberWithCommas(epochInfo.RecoveryTxnVersion))) + + bottomSection.WriteString(epochStyle.Render(epochContent)) + bottomSection.WriteString("\n") + } + + // Time scrubber + separatorStyle := lipgloss.NewStyle().Foreground(lipgloss.Color("240")) + bottomSection.WriteString(separatorStyle.Render(strings.Repeat("─", 20))) + bottomSection.WriteString("\n") + scrubberContent := fmt.Sprintf("Time: %.6fs", m.currentTime) + bottomSection.WriteString(scrubberStyle.Render(scrubberContent)) + bottomSection.WriteString("\n") + + // Help text + help := helpStyle.Render("Ctrl+N/P: next/prev event | g/G: start/end | t: jump time | /?: search | n/N: next/prev match | f: filter | r/R: recovery | c: config | x: health | h: help | q: quit") + bottomSection.WriteString(help) + + // Combine split view with bottom section + fullView := splitContent.String() + bottomSection.String() + + // If in filter view mode, show appropriate popup overlay + if m.filterViewMode { + // If in machine selection sub-popup, show it + if m.filterMachineSelectMode { + return m.renderMachineSelectionPopup(fullView) + } + // If in time range configuration sub-popup, show it + if m.filterTimeInputMode { + return m.renderFilterTimeRangePopup(fullView) + } + // If in Type search sub-popup, show it + if m.filterTypeSearchMode { + return m.renderTypeSearchPopup(fullView) + } + // Otherwise show main filter popup + return m.renderFilterPopup(fullView) + } + + // If in help view mode, show help popup overlay + if m.helpViewMode { + return m.renderHelpPopup(fullView) + } + + // If in health view mode, show health popup overlay + if m.healthViewMode { + return m.renderHealthPopup(fullView) + } + + // If in config view mode, show config popup overlay + if m.configViewMode { + config := m.traceData.GetLatestConfigAtTime(m.currentTime) + if config != nil { + return m.renderConfigPopup(fullView, config) + } else { + // No config available yet - show message + return m.renderNoConfigPopup(fullView) + } + } + + // If in time input mode, show popup overlay + if m.timeInputMode { + return m.renderTimeInputPopup(fullView) + } + + return fullView +} + +// renderFilterPopup renders the filter configuration popup overlay +func (m model) renderFilterPopup(baseView string) string { + popupStyle := lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(lipgloss.Color("39")). + Padding(1, 2). + Width(90) + + titleStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("39")). + Underline(true) + + categoryStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("46")) + + categorySelectedStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("226")) + + normalStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("252")) + + grayedStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("240")) + + selectedStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("226")). + Bold(true) + + var content strings.Builder + content.WriteString(titleStyle.Render("Filter Configuration")) + content.WriteString("\n\n") + + // Show "All" checkbox + checkbox := "[ ]" + if m.filterShowAll { + checkbox = "[x]" + } + content.WriteString(normalStyle.Render(fmt.Sprintf("%s All (space to toggle)", checkbox))) + content.WriteString("\n") + + // Determine if categories should be grayed out + isGrayed := m.filterShowAll + + // Category 1: Raw Filters + content.WriteString("\n") + cat1Style := categoryStyle + if m.filterCurrentCategory == 0 && !isGrayed { + cat1Style = categorySelectedStyle + } + if isGrayed { + cat1Style = grayedStyle + } + content.WriteString(cat1Style.Render("[1] Raw Filters (OR within category):")) + content.WriteString("\n") + + if isGrayed { + content.WriteString(grayedStyle.Render(" (disabled - toggle All off to configure)")) + content.WriteString("\n") + } else { + if len(m.filterRawList) == 0 { + content.WriteString(normalStyle.Render(" (no filters)")) + content.WriteString("\n") + } else { + // Pack filters into columns (max 5 rows per column) + maxRows := 5 + numColumns := (len(m.filterRawList) + maxRows - 1) / maxRows + + // Build columns + type ColumnItems struct { + items []int // Indices into m.filterRawList + startIdx int + endIdx int + } + var columns []ColumnItems + for col := 0; col < numColumns; col++ { + startIdx := col * maxRows + endIdx := startIdx + maxRows + if endIdx > len(m.filterRawList) { + endIdx = len(m.filterRawList) + } + var items []int + for i := startIdx; i < endIdx; i++ { + items = append(items, i) + } + columns = append(columns, ColumnItems{ + items: items, + startIdx: startIdx, + endIdx: endIdx, + }) + } + + // Show column indicator if multiple columns exist + if len(columns) > 1 { + columnIndicator := fmt.Sprintf("Column %d/%d (Ctrl+F/B to navigate)", m.filterRawColumn+1, len(columns)) + content.WriteString(normalStyle.Render(" " + columnIndicator)) + content.WriteString("\n\n") + } + + // Display only current column + if m.filterRawColumn >= 0 && m.filterRawColumn < len(columns) { + currentColumn := columns[m.filterRawColumn] + for _, i := range currentColumn.items { + filter := m.filterRawList[i] + filterStyle := normalStyle + prefix := " " + + // Check if this filter is disabled + if m.filterRawDisabled[i] { + filterStyle = grayedStyle + filter = filter + " [disabled]" + } + + // Check if selected + if i == m.filterRawSelectedIndex && !m.filterRawInputActive { + if m.filterRawDisabled[i] { + // Selected but disabled - show in grayed selected style + filterStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("240")).Bold(true) + } else { + filterStyle = selectedStyle + } + prefix = "→ " + } + + content.WriteString(filterStyle.Render(fmt.Sprintf("%s%s", prefix, filter))) + content.WriteString("\n") + } + } + } + // Input field for new raw filter + if m.filterRawInputActive { + content.WriteString(selectedStyle.Render(" New filter: ")) + content.WriteString(m.filterRawInput.View()) + content.WriteString("\n") + } else { + content.WriteString(normalStyle.Render(" Press 'a' to add, 'e' to edit, 'r' to remove, 'd' to toggle disable, 't' for Type search, 'c' for common, Ctrl+N/P to navigate, Ctrl+F/B for columns")) + content.WriteString("\n") + } + } + + // Category 2: Machine Filters + content.WriteString("\n") + cat2Style := categoryStyle + if m.filterCurrentCategory == 1 && !isGrayed { + cat2Style = categorySelectedStyle + } + if isGrayed { + cat2Style = grayedStyle + } + content.WriteString(cat2Style.Render("[2] By Machine (OR within category):")) + content.WriteString("\n") + + if isGrayed { + content.WriteString(grayedStyle.Render(" (disabled - toggle All off to configure)")) + content.WriteString("\n") + } else { + // Calculate total number of selected machines (individual + from DCs) + machineSet := make(map[string]bool) + + // Add individual machines + for _, machine := range m.filterMachineList { + machineSet[machine] = true + } + + // Add machines from selected DCs using cluster state + if len(m.filterMachineDCs) > 0 { + // Build cluster state to get unique machines by DC + clusterState := BuildClusterState(m.traceData.Events) + dcWorkers := clusterState.GetWorkersByDC() + + for dcID, workers := range dcWorkers { + if m.filterMachineDCs[dcID] { + for _, worker := range workers { + machineSet[worker.Machine] = true + } + } + } + } + + // Show selected machines count only (not DCs) + if len(m.filterMachineList) == 0 && len(m.filterMachineDCs) == 0 { + content.WriteString(normalStyle.Render(" (no machines selected)")) + content.WriteString("\n") + } else { + content.WriteString(normalStyle.Render(fmt.Sprintf(" Machines: %d selected", len(machineSet)))) + content.WriteString("\n") + } + content.WriteString(normalStyle.Render(" Press Enter to configure")) + content.WriteString("\n") + } + + // Category 3: Time Range + content.WriteString("\n") + cat3Style := categoryStyle + if m.filterCurrentCategory == 2 && !isGrayed { + cat3Style = categorySelectedStyle + } + if isGrayed { + cat3Style = grayedStyle + } + content.WriteString(cat3Style.Render("[3] By Time Range:")) + content.WriteString("\n") + + if isGrayed { + content.WriteString(grayedStyle.Render(" (disabled - toggle All off to configure)")) + content.WriteString("\n") + } else { + if m.filterTimeEnabled { + content.WriteString(normalStyle.Render(fmt.Sprintf(" [x] Enabled: %.6fs - %.6fs", m.filterTimeStart, m.filterTimeEnd))) + content.WriteString("\n") + } else { + content.WriteString(normalStyle.Render(" [ ] Disabled")) + content.WriteString("\n") + } + content.WriteString(normalStyle.Render(" Press 'd' to toggle, Enter to configure")) + content.WriteString("\n") + } + + // Category 4: Message Filter + content.WriteString("\n") + cat4Style := categoryStyle + if m.filterCurrentCategory == 3 && !isGrayed { + cat4Style = categorySelectedStyle + } + if isGrayed { + cat4Style = grayedStyle + } + content.WriteString(cat4Style.Render("[4] By Message (NetworkMessageSent events only):")) + content.WriteString("\n") + + if isGrayed { + content.WriteString(grayedStyle.Render(" (disabled - toggle All off to configure)")) + content.WriteString("\n") + } else { + if m.filterMessageEnabled { + content.WriteString(normalStyle.Render(" [x] Enabled: Show only NetworkMessageSent events")) + content.WriteString("\n") + } else { + content.WriteString(normalStyle.Render(" [ ] Disabled")) + content.WriteString("\n") + } + content.WriteString(normalStyle.Render(" Press 'd' to toggle")) + content.WriteString("\n") + } + + // Help text + content.WriteString("\n") + if isGrayed { + content.WriteString(normalStyle.Render("Space: toggle All | q/f/Esc: close")) + } else { + // Context-sensitive help based on current category + var categoryHelp string + if m.filterCurrentCategory == 0 { + // Raw category + categoryHelp = "a: add | t: Type search | e: edit | r: remove | d: toggle disable | c: common | Ctrl+N/P: navigate | Ctrl+F/B: columns | " + } else if m.filterCurrentCategory == 1 { + // Machine category + categoryHelp = "Enter: configure machines | " + } else if m.filterCurrentCategory == 2 { + // Time category + categoryHelp = "d: toggle | Enter: configure range | " + } else if m.filterCurrentCategory == 3 { + // Message category + categoryHelp = "d: toggle | " + } + content.WriteString(normalStyle.Render(categoryHelp + "1/2/3/4: jump | Ctrl+N/P: switch | Space: toggle All | q/f/Esc: close")) + } + + popup := popupStyle.Render(content.String()) + + // Center the popup + return lipgloss.Place(m.width, m.height, lipgloss.Center, lipgloss.Center, popup, lipgloss.WithWhitespaceChars(" ")) +} + +// renderFilterTimeRangePopup renders the time range configuration popup +func (m model) renderFilterTimeRangePopup(baseView string) string { + popupStyle := lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(lipgloss.Color("39")). + Padding(1, 2). + Width(60) + + titleStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("39")) + + labelStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("252")) + + selectedLabelStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("226")). + Bold(true) + + helpStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("241")). + MarginTop(1) + + rangeStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("243")). + Italic(true) + + var content strings.Builder + content.WriteString(titleStyle.Render("Configure Time Range Filter")) + content.WriteString("\n\n") + + // Show which field is being edited + startLabel := "Start Time:" + endLabel := "End Time:" + + if m.filterTimeEditingStart { + content.WriteString(selectedLabelStyle.Render(startLabel)) + content.WriteString(" ") + content.WriteString(m.filterTimeInput.View()) + content.WriteString("\n\n") + content.WriteString(labelStyle.Render(fmt.Sprintf("%s %.6fs", endLabel, m.filterTimeEnd))) + } else { + content.WriteString(labelStyle.Render(fmt.Sprintf("%s %.6fs", startLabel, m.filterTimeStart))) + content.WriteString("\n\n") + content.WriteString(selectedLabelStyle.Render(endLabel)) + content.WriteString(" ") + content.WriteString(m.filterTimeInput.View()) + } + + content.WriteString("\n\n") + rangeInfo := rangeStyle.Render(fmt.Sprintf("Valid range: %.2f - %.2f seconds", m.traceData.MinTime, m.traceData.MaxTime)) + content.WriteString(rangeInfo) + + content.WriteString("\n") + content.WriteString(helpStyle.Render("Tab: switch field | Enter: confirm | Esc: cancel")) + + popup := popupStyle.Render(content.String()) + + // Center the popup + return lipgloss.Place(m.width, m.height, lipgloss.Center, lipgloss.Center, popup, lipgloss.WithWhitespaceChars(" ")) +} + +// renderMachineSelectionPopup renders the machine selection popup overlay +func (m model) renderMachineSelectionPopup(baseView string) string { + popupStyle := lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(lipgloss.Color("39")). + Padding(1, 2). + Width(90). + MaxHeight(m.height - 4) + + titleStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("39")). + Underline(true) + + dcHeaderStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("33")) + + testerHeaderStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("135")) + + normalStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("252")) + + selectedStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("226")). + Bold(true) + + checkedStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("46")) + + roleStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("240")) + + var content strings.Builder + content.WriteString(titleStyle.Render("Select Machines")) + content.WriteString("\n\n") + + // Search input + content.WriteString(normalStyle.Render("Search: ")) + content.WriteString(m.filterMachineInput.View()) + content.WriteString("\n\n") + + // Get items using helper function + items := m.getMachineSelectionItems() + + // Calculate available height for content + maxDisplayLines := m.height - 12 // Account for title, search, help, padding, border + + // Pack items into columns (keep DCs with their machines) + var columns [][]SelectableItem + currentColumn := []SelectableItem{} + currentHeight := 0 + + for i := 0; i < len(items); i++ { + item := items[i] + + // Calculate how many lines this item will take + itemLines := 1 // DC header or machine line + if item.Type == "machine" && item.Worker != nil { + itemLines += len(item.Worker.Roles) // Roles under machine + } + + // If adding this item would overflow, start new column + if currentHeight+itemLines > maxDisplayLines && len(currentColumn) > 0 { + columns = append(columns, currentColumn) + currentColumn = []SelectableItem{} + currentHeight = 0 + } + + currentColumn = append(currentColumn, item) + currentHeight += itemLines + } + + // Add last column + if len(currentColumn) > 0 { + columns = append(columns, currentColumn) + } + + // Clamp column index + if m.filterMachineColumn >= len(columns) { + m.filterMachineColumn = len(columns) - 1 + } + if m.filterMachineColumn < 0 && len(columns) > 0 { + m.filterMachineColumn = 0 + } + + // Display only current column + if len(columns) == 0 { + content.WriteString(normalStyle.Render(" (no matches)")) + content.WriteString("\n") + } else { + // Get current column items + var displayItems []SelectableItem + if m.filterMachineColumn >= 0 && m.filterMachineColumn < len(columns) { + displayItems = columns[m.filterMachineColumn] + } + + // Show column indicator if multiple columns exist + if len(columns) > 1 { + columnIndicator := fmt.Sprintf("Column %d/%d (Ctrl+F/B to navigate)", m.filterMachineColumn+1, len(columns)) + content.WriteString(normalStyle.Render(" " + columnIndicator)) + content.WriteString("\n\n") + } + + // Render items in current column + for itemIdx, item := range displayItems { + if item.Type == "dc" { + var header string + if item.DC == "Testers" { + header = "Testers" + } else { + header = fmt.Sprintf("DC%s", item.DC) + } + + // Check if this DC is selected (highlighted) + if itemIdx == m.filterMachineSelected { + content.WriteString(selectedStyle.Render("→ " + header)) + } else { + if item.DC == "Testers" { + content.WriteString(testerHeaderStyle.Render(" " + header)) + } else { + content.WriteString(dcHeaderStyle.Render(" " + header)) + } + } + content.WriteString("\n") + + } else if item.Type == "machine" { + // Check if machine or DC is selected + isSelected := false + for _, m2 := range m.filterMachineList { + if m2 == item.Machine { + isSelected = true + break + } + } + dc := extractDCFromAddress(item.Machine) + if !isSelected && dc != "" && m.filterMachineDCs[dc] { + isSelected = true + } + + checkbox := "[ ]" + if isSelected { + checkbox = "[x]" + } + + machineLine := fmt.Sprintf(" %s %s", checkbox, item.Machine) + + // Check if this machine is selected (highlighted) + if itemIdx == m.filterMachineSelected { + // Apply checkbox styling first + if isSelected { + machineLine = strings.Replace(machineLine, "[x]", checkedStyle.Render("[x]"), 1) + } + content.WriteString(selectedStyle.Render("→ " + strings.TrimPrefix(machineLine, " "))) + } else { + // Apply checkbox styling + if isSelected { + machineLine = strings.Replace(machineLine, "[x]", checkedStyle.Render("[x]"), 1) + } + content.WriteString(normalStyle.Render(machineLine)) + } + content.WriteString("\n") + + // Add role lines + if item.Worker != nil { + for _, role := range item.Worker.Roles { + roleLabel := formatRoleLabel(role) + content.WriteString(roleStyle.Render(fmt.Sprintf(" %s", roleLabel))) + content.WriteString("\n") + } + } + } + } + } + + // Help text + content.WriteString("\n") + content.WriteString(normalStyle.Render("Ctrl+N/P: row | Ctrl+F/B: column | Space: toggle | Enter: done | Esc: cancel")) + + popup := popupStyle.Render(content.String()) + + // Center the popup + return lipgloss.Place(m.width, m.height, lipgloss.Center, lipgloss.Center, popup, lipgloss.WithWhitespaceChars(" ")) +} + +// renderTypeSearchPopup renders the Type search popup overlay +func (m model) renderTypeSearchPopup(baseView string) string { + popupStyle := lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(lipgloss.Color("39")). + Padding(1, 2). + Width(70). + MaxHeight(m.height - 4) + + titleStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("39")). + Underline(true) + + normalStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("252")) + + selectedStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("226")). + Bold(true) + + var content strings.Builder + content.WriteString(titleStyle.Render("Search Event Types")) + content.WriteString("\n\n") + + // Search input + content.WriteString(normalStyle.Render("Search: ")) + content.WriteString(m.filterTypeSearchInput.View()) + content.WriteString("\n\n") + + // Filter Type list based on search input + searchTerm := strings.ToLower(m.filterTypeSearchInput.Value()) + var filteredTypes []string + for _, t := range m.filterTypeSearchList { + if searchTerm == "" || strings.Contains(strings.ToLower(t), searchTerm) { + filteredTypes = append(filteredTypes, t) + } + } + + // Calculate available height for Type list + maxDisplayLines := m.height - 14 // Account for title, search, help, padding, border + + // Display filtered Types with scrolling + if len(filteredTypes) == 0 { + content.WriteString(normalStyle.Render(" (no matches)")) + content.WriteString("\n") + } else { + // Clamp selected index + if m.filterTypeSearchSelected >= len(filteredTypes) { + m.filterTypeSearchSelected = len(filteredTypes) - 1 + } + if m.filterTypeSearchSelected < 0 { + m.filterTypeSearchSelected = 0 + } + + // Calculate scroll offset to keep selected item visible + scrollOffset := 0 + if m.filterTypeSearchSelected >= maxDisplayLines { + scrollOffset = m.filterTypeSearchSelected - maxDisplayLines + 1 + } + + // Show count if many types + if len(filteredTypes) > maxDisplayLines { + countInfo := fmt.Sprintf(" Showing %d-%d of %d types", scrollOffset+1, min(scrollOffset+maxDisplayLines, len(filteredTypes)), len(filteredTypes)) + content.WriteString(normalStyle.Render(countInfo)) + content.WriteString("\n\n") + } + + // Display visible range + endIdx := scrollOffset + maxDisplayLines + if endIdx > len(filteredTypes) { + endIdx = len(filteredTypes) + } + + for i := scrollOffset; i < endIdx; i++ { + typeValue := filteredTypes[i] + prefix := " " + style := normalStyle + + if i == m.filterTypeSearchSelected { + prefix = "→ " + style = selectedStyle + } + + content.WriteString(style.Render(fmt.Sprintf("%s%s", prefix, typeValue))) + content.WriteString("\n") + } + } + + // Help text + content.WriteString("\n") + content.WriteString(normalStyle.Render("Ctrl+N/P: navigate | Enter: add filter | Esc: cancel")) + + popup := popupStyle.Render(content.String()) + + // Center the popup + return lipgloss.Place(m.width, m.height, lipgloss.Center, lipgloss.Center, popup, lipgloss.WithWhitespaceChars(" ")) +} + +// min returns the minimum of two integers +func min(a, b int) int { + if a < b { + return a + } + return b +} + +// NetworkMetric represents a single network latency measurement +type NetworkMetric struct { + Time string + TimeValue float64 + Src string + Dst string + MinLatency float64 + MaxLatency float64 + MedianLatency float64 + P90Latency float64 + TimeoutCount int +} + +// DegradedPeerMetric represents a degraded peer detection event +type DegradedPeerMetric struct { + Time string + TimeValue float64 + Src string + Dst string + Disconnected string + MinLatency float64 + MaxLatency float64 + MedianLatency float64 + CheckedPercentileLatency float64 + ConnectionFailureCount int +} + +// ConnectionMetric represents a Sim2Connection or SimulatedDisconnection event +type ConnectionMetric struct { + Time string + TimeValue float64 + Src string + Dst string + Latency float64 + Disconnection string // Phase value from SimulatedDisconnection events +} + +// collectNetworkMetrics collects PingLatency events up to current time +// Returns the latest metric for each (src, dst) pair +func (m *model) collectNetworkMetrics() []NetworkMetric { + // Map to store latest metric for each (src, dst) pair + metricsMap := make(map[string]*NetworkMetric) + + // Scan all events up to current event index + for i := 0; i <= m.currentEventIndex && i < len(m.traceData.Events); i++ { + event := &m.traceData.Events[i] + + // Filter for PingLatency events + if event.Type != "PingLatency" { + continue + } + + // Skip if no Machine or PeerAddress + src := event.Machine + dst := event.Attrs["PeerAddress"] + if dst == "" { + dst = event.Attrs["PeerAddr"] + } + if src == "" || dst == "" { + continue + } + + // Skip 0.0.0.0 addresses (invalid/placeholder) + if strings.HasPrefix(src, "0.0.0.0") || strings.HasPrefix(dst, "0.0.0.0") { + continue + } + + // Create key for (src, dst) pair + key := src + "|" + dst + + // Parse metrics from event attributes + metric := NetworkMetric{ + Time: event.Time, + TimeValue: event.TimeValue, + Src: src, + Dst: dst, + } + + // Parse latencies - skip this metric if any value is invalid or unreasonably large + validMetric := true + if val, err := strconv.ParseFloat(event.Attrs["MinLatency"], 64); err == nil && val < 1000.0 { + metric.MinLatency = val + } else { + validMetric = false + } + if val, err := strconv.ParseFloat(event.Attrs["MaxLatency"], 64); err == nil && val < 1000.0 { + metric.MaxLatency = val + } else { + validMetric = false + } + if val, err := strconv.ParseFloat(event.Attrs["MedianLatency"], 64); err == nil && val < 1000.0 { + metric.MedianLatency = val + } else { + validMetric = false + } + if val, err := strconv.ParseFloat(event.Attrs["P90Latency"], 64); err == nil && val < 1000.0 { + metric.P90Latency = val + } else { + validMetric = false + } + if val, err := strconv.Atoi(event.Attrs["TimeoutCount"]); err == nil { + metric.TimeoutCount = val + } + + // Only keep valid metrics + if !validMetric { + continue + } + + // Keep latest event for this (src, dst) pair + if existing, found := metricsMap[key]; !found || event.TimeValue > existing.TimeValue { + metricsMap[key] = &metric + } + } + + // Convert map to slice + var metrics []NetworkMetric + for _, metric := range metricsMap { + metrics = append(metrics, *metric) + } + + // Sort by (MedianLatency desc, Src, Dst) for deterministic ordering + sort.Slice(metrics, func(i, j int) bool { + if metrics[i].MedianLatency != metrics[j].MedianLatency { + return metrics[i].MedianLatency > metrics[j].MedianLatency + } + if metrics[i].Src != metrics[j].Src { + return metrics[i].Src < metrics[j].Src + } + return metrics[i].Dst < metrics[j].Dst + }) + + return metrics +} + +// collectDegradedPeerMetrics collects HealthMonitorDetectDegradedPeer events up to current time +// Returns the latest metric for each (src, dst) pair, sorted by dst +func (m *model) collectDegradedPeerMetrics() []DegradedPeerMetric { + // Map to store latest metric for each (src, dst) pair + metricsMap := make(map[string]*DegradedPeerMetric) + + // Scan all events up to current event index + for i := 0; i <= m.currentEventIndex && i < len(m.traceData.Events); i++ { + event := &m.traceData.Events[i] + + // Filter for HealthMonitorDetectDegradedPeer events + if event.Type != "HealthMonitorDetectDegradedPeer" { + continue + } + + // Skip if no Machine or PeerAddress + src := event.Machine + dst := event.Attrs["PeerAddress"] + if dst == "" { + continue + } + if src == "" { + continue + } + + // Skip 0.0.0.0 addresses (invalid/placeholder) + if strings.HasPrefix(src, "0.0.0.0") || strings.HasPrefix(dst, "0.0.0.0") { + continue + } + + // Create key for (src, dst) pair + key := src + "|" + dst + + // Parse metric from event attributes + metric := DegradedPeerMetric{ + Time: event.Time, + TimeValue: event.TimeValue, + Src: src, + Dst: dst, + Disconnected: event.Attrs["Disconnected"], + } + + // Parse latencies - skip invalid or unreasonably large values + if val, err := strconv.ParseFloat(event.Attrs["MinLatency"], 64); err == nil && val < 1000.0 { + metric.MinLatency = val + } + if val, err := strconv.ParseFloat(event.Attrs["MaxLatency"], 64); err == nil && val < 1000.0 { + metric.MaxLatency = val + } + if val, err := strconv.ParseFloat(event.Attrs["MedianLatency"], 64); err == nil && val < 1000.0 { + metric.MedianLatency = val + } + if val, err := strconv.ParseFloat(event.Attrs["CheckedPercentileLatency"], 64); err == nil && val < 1000.0 { + metric.CheckedPercentileLatency = val + } + if val, err := strconv.Atoi(event.Attrs["ConnectionFailureCount"]); err == nil { + metric.ConnectionFailureCount = val + } + + // Keep latest event for this (src, dst) pair + if existing, found := metricsMap[key]; !found || event.TimeValue > existing.TimeValue { + metricsMap[key] = &metric + } + } + + // Convert map to slice + var metrics []DegradedPeerMetric + for _, metric := range metricsMap { + metrics = append(metrics, *metric) + } + + // Sort by (MedianLatency desc, Src, Dst) for deterministic ordering + sort.Slice(metrics, func(i, j int) bool { + if metrics[i].MedianLatency != metrics[j].MedianLatency { + return metrics[i].MedianLatency > metrics[j].MedianLatency + } + if metrics[i].Src != metrics[j].Src { + return metrics[i].Src < metrics[j].Src + } + return metrics[i].Dst < metrics[j].Dst + }) + + return metrics +} + +// collectConnectionMetrics collects Sim2Connection and SimulatedDisconnection events up to current time +// Returns the latest metric for each (src, dst) pair, sorted by latency (descending) +func (m *model) collectConnectionMetrics() []ConnectionMetric { + // Map to store latest metric for each (src, dst) pair + metricsMap := make(map[string]*ConnectionMetric) + + // Scan all events up to current event index + for i := 0; i <= m.currentEventIndex && i < len(m.traceData.Events); i++ { + event := &m.traceData.Events[i] + + var src, dst string + var metric ConnectionMetric + + if event.Type == "Sim2Connection" { + // Handle Sim2Connection events + src = event.Attrs["From"] + dst = event.Attrs["To"] + if src == "" || dst == "" { + continue + } + + // Skip 0.0.0.0 addresses (invalid/placeholder) + if strings.HasPrefix(src, "0.0.0.0") || strings.HasPrefix(dst, "0.0.0.0") { + continue + } + + metric = ConnectionMetric{ + Time: event.Time, + TimeValue: event.TimeValue, + Src: src, + Dst: dst, + } + + // Parse latency - skip invalid or unreasonably large values + if val, err := strconv.ParseFloat(event.Attrs["Latency"], 64); err == nil && val < 1000.0 { + metric.Latency = val + } + + } else if event.Type == "SimulatedDisconnection" { + // Handle SimulatedDisconnection events + src = event.Attrs["Address"] + dst = event.Attrs["PeerAddress"] + if src == "" || dst == "" { + continue + } + + // Skip 0.0.0.0 addresses (invalid/placeholder) + if strings.HasPrefix(src, "0.0.0.0") || strings.HasPrefix(dst, "0.0.0.0") { + continue + } + + metric = ConnectionMetric{ + Time: event.Time, + TimeValue: event.TimeValue, + Src: src, + Dst: dst, + Disconnection: event.Attrs["Phase"], + } + + } else { + continue + } + + // Create key for (src, dst) pair + key := src + "|" + dst + + // Keep latest event for this (src, dst) pair + if existing, found := metricsMap[key]; !found || event.TimeValue > existing.TimeValue { + metricsMap[key] = &metric + } + } + + // Convert map to slice + var metrics []ConnectionMetric + for _, metric := range metricsMap { + metrics = append(metrics, *metric) + } + + // Sort by (Latency desc, Src, Dst) for deterministic ordering + sort.Slice(metrics, func(i, j int) bool { + if metrics[i].Latency != metrics[j].Latency { + return metrics[i].Latency > metrics[j].Latency + } + if metrics[i].Src != metrics[j].Src { + return metrics[i].Src < metrics[j].Src + } + return metrics[i].Dst < metrics[j].Dst + }) + + return metrics +} + +// renderHealthPopup renders the health metrics popup overlay +func (m model) renderHealthPopup(baseView string) string { + popupStyle := lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(lipgloss.Color("39")). + Padding(1, 2). + MaxWidth(m.width - 4). + MaxHeight(m.height - 4) + + titleStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("39")). + Underline(true) + + sectionStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("46")). + MarginTop(1) + + headerStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("33")). + Bold(true) + + normalStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("252")) + + helpStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("241")). + MarginTop(1) + + scrollIndicatorStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("240")). + Italic(true) + + var content strings.Builder + content.WriteString(titleStyle.Render(fmt.Sprintf("Cluster Health Snapshot (t=%.6fs)", m.currentTime))) + content.WriteString("\n\n") + + // Network section + content.WriteString(sectionStyle.Render("NETWORK LATENCIES (PingLatency)")) + content.WriteString("\n\n") + + // Collect network metrics + metrics := m.collectNetworkMetrics() + + if len(metrics) == 0 { + content.WriteString(normalStyle.Render(" No PingLatency events found")) + content.WriteString("\n") + } else { + // Table header + header := fmt.Sprintf("%-12s %-21s %-21s %8s %8s %8s %8s %8s", + "Time", "Src", "Dst", "MinLat", "MaxLat", "MedLat", "P90Lat", "Timeouts") + content.WriteString(headerStyle.Render(header)) + content.WriteString("\n") + + // Separator line + separator := strings.Repeat("─", 130) + content.WriteString(normalStyle.Render(separator)) + content.WriteString("\n") + + // Table rows (show top 5 only) + displayCount := len(metrics) + if displayCount > 5 { + displayCount = 5 + } + + for i := 0; i < displayCount; i++ { + metric := metrics[i] + row := fmt.Sprintf("%-12s %-21s %-21s %7.3fs %7.3fs %7.3fs %7.3fs %8d", + metric.Time, + truncateAddr(metric.Src, 21), + truncateAddr(metric.Dst, 21), + metric.MinLatency, + metric.MaxLatency, + metric.MedianLatency, + metric.P90Latency, + metric.TimeoutCount) + content.WriteString(normalStyle.Render(row)) + content.WriteString("\n") + } + + if len(metrics) > 5 { + content.WriteString(normalStyle.Render(fmt.Sprintf(" ... %d more entries not shown", len(metrics)-5))) + content.WriteString("\n") + } + } + + // Degraded Peers section + content.WriteString("\n") + content.WriteString(sectionStyle.Render("DEGRADED PEERS (HealthMonitorDetectDegradedPeer)")) + content.WriteString("\n\n") + + // Collect degraded peer metrics + degradedMetrics := m.collectDegradedPeerMetrics() + + if len(degradedMetrics) == 0 { + content.WriteString(normalStyle.Render(" No HealthMonitorDetectDegradedPeer events found")) + content.WriteString("\n") + } else { + // Table header + header := fmt.Sprintf("%-12s %-21s %-21s %12s %8s %8s %8s %10s %10s", + "Time", "Src", "Dst (disc/deg)", "Disconnected", "MinLat", "MaxLat", "MedLat", "P%Lat", "ConnFail") + content.WriteString(headerStyle.Render(header)) + content.WriteString("\n") + + // Separator line + separator := strings.Repeat("─", 130) + content.WriteString(normalStyle.Render(separator)) + content.WriteString("\n") + + // Table rows (show top 5 only) + displayCount := len(degradedMetrics) + if displayCount > 5 { + displayCount = 5 + } + + for i := 0; i < displayCount; i++ { + metric := degradedMetrics[i] + row := fmt.Sprintf("%-12s %-21s %-21s %12s %7.3fs %7.3fs %7.3fs %9.3fs %10d", + metric.Time, + truncateAddr(metric.Src, 21), + truncateAddr(metric.Dst, 21), + metric.Disconnected, + metric.MinLatency, + metric.MaxLatency, + metric.MedianLatency, + metric.CheckedPercentileLatency, + metric.ConnectionFailureCount) + content.WriteString(normalStyle.Render(row)) + content.WriteString("\n") + } + + if len(degradedMetrics) > 5 { + content.WriteString(normalStyle.Render(fmt.Sprintf(" ... %d more entries not shown", len(degradedMetrics)-5))) + content.WriteString("\n") + } + } + + // Sim2Connection and SimulatedDisconnection section + content.WriteString("\n") + content.WriteString(sectionStyle.Render("CONNECTIONS (Sim2Connection / SimulatedDisconnection)")) + content.WriteString("\n\n") + + // Collect connection metrics + connMetrics := m.collectConnectionMetrics() + + if len(connMetrics) == 0 { + content.WriteString(normalStyle.Render(" No Sim2Connection or SimulatedDisconnection events found")) + content.WriteString("\n") + } else { + // Table header + header := fmt.Sprintf("%-12s %-21s %-21s %10s %14s", + "Time", "Src", "Dst", "Latency", "Disconnection") + content.WriteString(headerStyle.Render(header)) + content.WriteString("\n") + + // Separator line + separator := strings.Repeat("─", 85) + content.WriteString(normalStyle.Render(separator)) + content.WriteString("\n") + + // Table rows (show top 5 only) + displayCount := len(connMetrics) + if displayCount > 5 { + displayCount = 5 + } + + for i := 0; i < displayCount; i++ { + metric := connMetrics[i] + row := fmt.Sprintf("%-12s %-21s %-21s %9.3fs %14s", + metric.Time, + truncateAddr(metric.Src, 21), + truncateAddr(metric.Dst, 21), + metric.Latency, + metric.Disconnection) + content.WriteString(normalStyle.Render(row)) + content.WriteString("\n") + } + + if len(connMetrics) > 5 { + content.WriteString(normalStyle.Render(fmt.Sprintf(" ... %d more entries not shown", len(connMetrics)-5))) + content.WriteString("\n") + } + } + + // Split content into lines for scrolling + contentLines := strings.Split(content.String(), "\n") + totalLines := len(contentLines) + + // Calculate available height for content + // Account for: title (1 line) + top margin (1) + help text (2) + bottom margin (1) + padding (2) + border (2) = 9 lines + maxContentHeight := m.height - 9 + if maxContentHeight < 5 { + maxContentHeight = 5 // Minimum visible lines + } + + // Clamp scroll offset + maxScrollOffset := totalLines - maxContentHeight + if maxScrollOffset < 0 { + maxScrollOffset = 0 + } + + displayScrollOffset := m.healthScrollOffset + if displayScrollOffset < 0 { + displayScrollOffset = 0 + } + if displayScrollOffset > maxScrollOffset { + displayScrollOffset = maxScrollOffset + } + + // Determine if we have more content above/below + hasMoreAbove := displayScrollOffset > 0 + hasMoreBelow := displayScrollOffset < maxScrollOffset + + // Calculate visible window + visibleLines := contentLines + if totalLines > maxContentHeight { + endIdx := displayScrollOffset + maxContentHeight + if endIdx > totalLines { + endIdx = totalLines + } + visibleLines = contentLines[displayScrollOffset:endIdx] + } + + // Build content with scroll indicators + var scrollableContent strings.Builder + + if hasMoreAbove { + scrollableContent.WriteString(scrollIndicatorStyle.Render("↑ more above")) + scrollableContent.WriteString("\n") + } + + scrollableContent.WriteString(strings.Join(visibleLines, "\n")) + + if hasMoreBelow { + scrollableContent.WriteString("\n") + scrollableContent.WriteString(scrollIndicatorStyle.Render("↓ more below")) + } + + scrollableContent.WriteString("\n") + scrollableContent.WriteString(helpStyle.Render("\nPress q/x/Esc to close | Ctrl+N/P to scroll")) + + popup := popupStyle.Render(scrollableContent.String()) + + // Center the popup + return lipgloss.Place(m.width, m.height, lipgloss.Center, lipgloss.Center, popup, lipgloss.WithWhitespaceChars(" ")) +} + +// truncateAddr truncates an address to fit within maxLen characters +func truncateAddr(addr string, maxLen int) string { + if len(addr) <= maxLen { + return addr + } + // Truncate from the middle to keep both start and end visible + halfLen := (maxLen - 3) / 2 + return addr[:halfLen] + "..." + addr[len(addr)-halfLen:] +} + +// renderHelpPopup renders the help information popup overlay +func (m model) renderHelpPopup(baseView string) string { + popupStyle := lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(lipgloss.Color("39")). + Padding(1, 2). + Width(80) + + titleStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("39")). + Underline(true) + + sectionStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("46")). + MarginTop(1) + + commandStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("252")) + + helpStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("241")). + MarginTop(1) + + var content strings.Builder + + // ASCII Art Header - Time Wizard + asciiArt := ` + ⏪ ═══════════════════════ ⏩ + ★ + /| + / | + / | + / ✨| + / | + /_____| + ( o o ) + \ > / + ~~~~~~~~~~~ + \ ~~~~ / + \ / + ✨ | | ✨ + ◀──── | | ────▶ + / \ + ⌛ 🔮 ⌛ + ═══════════════════════ + FDB REPLAY + Time Travel Wizard` + + content.WriteString(titleStyle.Render(asciiArt)) + content.WriteString("\n\n") + + // Navigation section + content.WriteString(sectionStyle.Render("Navigation:")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" Ctrl+N / Ctrl+P Next / previous trace event")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" Ctrl+V / Alt+V Page forward / backward (±1 second)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" g / G Jump to start / end")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" t Jump to specific time")) + content.WriteString("\n\n") + + // Search section + content.WriteString(sectionStyle.Render("Search:")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" / Search forward (use * for wildcard)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" ? Search backward (use * for wildcard)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" n Go to next match")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" N Go to previous match")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" Esc Clear search highlighting")) + content.WriteString("\n\n") + + // Filter section + content.WriteString(sectionStyle.Render("Filter:")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" f Open filter configuration popup")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" Space Toggle 'All' to enable/disable filtering")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" 1/2/3 Jump to Raw/Machine/Time category")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" Ctrl+N/P Switch categories (or navigate within Raw filters)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" a Add new raw filter (wildcard patterns: Type=*Recovery*)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" t Search and add Type filter (fuzzy match Type values)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" e Edit selected raw filter")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" r Remove selected raw filter")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" d Toggle disable on selected raw filter (Raw category)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" Toggle time filter on/off (Time category)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" c Toggle common trace event filters (Raw category)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" Adds/removes pre-defined important event types")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" Ctrl+F/B Navigate columns (in Raw filters)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" Enter Configure Machine/Time filters")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" Filters use AND logic between categories, OR within")) + content.WriteString("\n\n") + + // Recovery section + content.WriteString(sectionStyle.Render("Recovery Navigation:")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" r / R Jump to next / prev recovery start (StatusCode=0)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" e / E Jump to next / prev MasterRecoveryState (any)")) + content.WriteString("\n\n") + + // Severity Navigation section + content.WriteString(sectionStyle.Render("Severity Navigation:")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" 3 / Shift+3 Jump to next / prev Severity=30 event")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" 4 / Shift+4 Jump to next / prev Severity=40 event")) + content.WriteString("\n\n") + + // View section + content.WriteString(sectionStyle.Render("Views:")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" c Show full DB config JSON (Ctrl+N/P to scroll)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" x Show health metrics (network, degraded peers, connections)")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" h Show this help")) + content.WriteString("\n\n") + + // General section + content.WriteString(sectionStyle.Render("General:")) + content.WriteString("\n") + content.WriteString(commandStyle.Render(" q / Q / Ctrl+C Quit")) + content.WriteString("\n") + + content.WriteString(helpStyle.Render("\nPress q/h/Esc to close")) + + popup := popupStyle.Render(content.String()) + + // Overlay the popup on top of the base view + return lipgloss.Place(m.width, m.height, lipgloss.Center, lipgloss.Center, popup, lipgloss.WithWhitespaceChars(" ")) +} + +// renderNoConfigPopup renders a message when no config is available +func (m model) renderNoConfigPopup(baseView string) string { + popupStyle := lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(lipgloss.Color("39")). + Padding(1, 2). + Width(50) + + titleStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("39")) + + messageStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("243")). + MarginTop(1) + + helpStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("241")). + MarginTop(1) + + popupContent := titleStyle.Render("DB Config") + "\n" + + messageStyle.Render("No configuration available yet at this time.") + "\n" + + helpStyle.Render("Press q/c/Esc to close") + + popup := popupStyle.Render(popupContent) + + // Overlay the popup on top of the base view + return lipgloss.Place(m.width, m.height, lipgloss.Center, lipgloss.Center, popup, lipgloss.WithWhitespaceChars(" ")) +} + +// renderConfigPopup renders the full config JSON popup overlay +func (m model) renderConfigPopup(baseView string, config *DBConfig) string { + popupStyle := lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(lipgloss.Color("39")). + Padding(1, 2). + MaxWidth(m.width - 10). + MaxHeight(m.height - 4) + + titleStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("39")) + + helpStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("241")). + MarginTop(1) + + jsonStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("252")) + + scrollIndicatorStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("240")). + Italic(true) + + // Pretty-print the JSON + jsonBytes, err := json.MarshalIndent(config.RawJSON, "", " ") + var jsonContent string + if err != nil { + jsonContent = "Error formatting JSON" + } else { + jsonContent = string(jsonBytes) + } + + // Split JSON into lines + jsonLines := strings.Split(jsonContent, "\n") + totalLines := len(jsonLines) + + // Calculate available height for JSON content + // Account for: title (1 line) + top margin (1) + help text (1) + bottom margin (1) + padding (2) + border (2) = 8 lines + maxContentHeight := m.height - 12 + if maxContentHeight < 5 { + maxContentHeight = 5 // Minimum visible lines + } + + // Clamp scroll offset + maxScrollOffset := totalLines - maxContentHeight + if maxScrollOffset < 0 { + maxScrollOffset = 0 + } + + displayScrollOffset := m.configScrollOffset + if displayScrollOffset < 0 { + displayScrollOffset = 0 + } + if displayScrollOffset > maxScrollOffset { + displayScrollOffset = maxScrollOffset + } + + // Determine if we have more content above/below + hasMoreAbove := displayScrollOffset > 0 + hasMoreBelow := displayScrollOffset < maxScrollOffset + + // Calculate visible window + visibleLines := jsonLines + if totalLines > maxContentHeight { + endIdx := displayScrollOffset + maxContentHeight + if endIdx > totalLines { + endIdx = totalLines + } + visibleLines = jsonLines[displayScrollOffset:endIdx] + } + + // Build JSON content with scroll indicators + var jsonContentBuilder strings.Builder + + if hasMoreAbove { + jsonContentBuilder.WriteString(scrollIndicatorStyle.Render("↑ more above")) + jsonContentBuilder.WriteString("\n") + } + + jsonContentBuilder.WriteString(jsonStyle.Render(strings.Join(visibleLines, "\n"))) + + if hasMoreBelow { + jsonContentBuilder.WriteString("\n") + jsonContentBuilder.WriteString(scrollIndicatorStyle.Render("↓ more below")) + } + + // Build popup content + popupContent := titleStyle.Render(fmt.Sprintf("DB Config (t=%.2fs)", config.Time)) + "\n\n" + + jsonContentBuilder.String() + "\n\n" + + helpStyle.Render("Press q/c/Esc to close | Ctrl+N/P to scroll") + + popup := popupStyle.Render(popupContent) + + // Overlay the popup on top of the base view + return lipgloss.Place(m.width, m.height, lipgloss.Center, lipgloss.Center, popup, lipgloss.WithWhitespaceChars(" ")) +} + +// renderTimeInputPopup renders the time input popup overlay +func (m model) renderTimeInputPopup(baseView string) string { + popupStyle := lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(lipgloss.Color("39")). + Padding(1, 2). + Width(50) + + titleStyle := lipgloss.NewStyle(). + Bold(true). + Foreground(lipgloss.Color("39")) + + helpStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("241")). + MarginTop(1) + + errorStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("196")). + MarginTop(1) + + rangeStyle := lipgloss.NewStyle(). + Foreground(lipgloss.Color("243")). + Italic(true) + + // Validate the current input + var validationMsg string + if inputValue := m.timeInput.Value(); inputValue != "" { + if targetTime, err := strconv.ParseFloat(inputValue, 64); err != nil { + validationMsg = errorStyle.Render("✗ Invalid number format") + } else if targetTime < m.traceData.MinTime { + validationMsg = errorStyle.Render(fmt.Sprintf("✗ Time must be >= %.2f", m.traceData.MinTime)) + } else if targetTime > m.traceData.MaxTime { + validationMsg = errorStyle.Render(fmt.Sprintf("✗ Time must be <= %.2f", m.traceData.MaxTime)) + } else { + validationMsg = lipgloss.NewStyle().Foreground(lipgloss.Color("46")).Render("✓ Valid") + } + } + + rangeInfo := rangeStyle.Render(fmt.Sprintf("Valid range: %.2f - %.2f seconds", m.traceData.MinTime, m.traceData.MaxTime)) + + popupContent := titleStyle.Render("Jump to Time") + "\n\n" + + m.timeInput.View() + "\n" + + if validationMsg != "" { + popupContent += validationMsg + "\n" + } + + popupContent += "\n" + rangeInfo + "\n" + + helpStyle.Render("Enter: jump | Esc/q/t: cancel") + + popup := popupStyle.Render(popupContent) + + // Overlay the popup on top of the base view + // Place it roughly in the center + return lipgloss.Place(m.width, m.height, lipgloss.Center, lipgloss.Center, popup, lipgloss.WithWhitespaceChars(" ")) +} + +// convertWildcardToRegex converts a simple wildcard pattern to regex +// * matches 0 or more characters +func convertWildcardToRegex(pattern string) string { + // Escape regex special characters except * + var result strings.Builder + for _, ch := range pattern { + switch ch { + case '*': + result.WriteString(".*") + case '.', '+', '?', '^', '$', '(', ')', '[', ']', '{', '}', '|', '\\': + result.WriteRune('\\') + result.WriteRune(ch) + default: + result.WriteRune(ch) + } + } + return result.String() +} + +// extractLiterals extracts the non-wildcard literal parts from a search pattern +// For example: "*Recovery*State*" -> ["Recovery", "State"] +func extractLiterals(pattern string) []string { + // Split by * to get literal parts + parts := strings.Split(pattern, "*") + var literals []string + for _, part := range parts { + if part != "" { + literals = append(literals, part) + } + } + return literals +} + +// getEventFullText builds a full text representation of an event including ALL fields +func getEventFullText(event *TraceEvent) string { + var parts []string + + // Include all standard fields + if event.Time != "" { + parts = append(parts, "Time="+event.Time) + } + if event.Type != "" { + parts = append(parts, "Type="+event.Type) + } + if event.Severity != "" { + parts = append(parts, "Severity="+event.Severity) + } + if event.Machine != "" { + parts = append(parts, "Machine="+event.Machine) + } + if event.ID != "" { + parts = append(parts, "ID="+event.ID) + } + + // Include all attributes (sorted for consistency) + var attrKeys []string + for key := range event.Attrs { + attrKeys = append(attrKeys, key) + } + sort.Strings(attrKeys) + + for _, key := range attrKeys { + value := event.Attrs[key] + parts = append(parts, key+"="+value) + } + + return strings.Join(parts, " ") +} + +// searchForward searches for pattern starting from startIndex going forward +// Returns the index of the first matching event, or -1 if not found +// Respects active filters - only searches visible (non-filtered) events +func (m *model) searchForward(startIndex int, pattern string) int { + regexPattern := convertWildcardToRegex(pattern) + re, err := regexp.Compile(regexPattern) + if err != nil { + return -1 + } + + for i := startIndex; i < len(m.traceData.Events); i++ { + event := &m.traceData.Events[i] + + // Skip filtered events + if !eventMatchesFilters(event, m) { + continue + } + + eventText := getEventFullText(event) + if re.MatchString(eventText) { + return i + } + } + + // Wrap around to beginning + for i := 0; i < startIndex; i++ { + event := &m.traceData.Events[i] + + // Skip filtered events + if !eventMatchesFilters(event, m) { + continue + } + + eventText := getEventFullText(event) + if re.MatchString(eventText) { + return i + } + } + + return -1 +} + +// searchBackward searches for pattern starting from startIndex going backward +// Returns the index of the first matching event, or -1 if not found +// Respects active filters - only searches visible (non-filtered) events +func (m *model) searchBackward(startIndex int, pattern string) int { + regexPattern := convertWildcardToRegex(pattern) + re, err := regexp.Compile(regexPattern) + if err != nil { + return -1 + } + + for i := startIndex; i >= 0; i-- { + event := &m.traceData.Events[i] + + // Skip filtered events + if !eventMatchesFilters(event, m) { + continue + } + + eventText := getEventFullText(event) + if re.MatchString(eventText) { + return i + } + } + + // Wrap around to end + for i := len(m.traceData.Events) - 1; i > startIndex; i-- { + event := &m.traceData.Events[i] + + // Skip filtered events + if !eventMatchesFilters(event, m) { + continue + } + + eventText := getEventFullText(event) + if re.MatchString(eventText) { + return i + } + } + + return -1 +} + +// recompileRawFilterRegexes pre-compiles all raw filter regex patterns for performance +// This should be called whenever filterRawList changes (add, remove, edit, toggle common) +func (m *model) recompileRawFilterRegexes() { + m.filterRawCompiledRegex = make([]*regexp.Regexp, len(m.filterRawList)) + for i, filter := range m.filterRawList { + regexPattern := convertWildcardToRegex(filter) + re, err := regexp.Compile(regexPattern) + if err != nil { + // Store nil for invalid patterns + m.filterRawCompiledRegex[i] = nil + } else { + m.filterRawCompiledRegex[i] = re + } + } +} + +// rebuildMachineSet rebuilds the machine set for O(1) lookups +// This should be called whenever filterMachineList changes +func (m *model) rebuildMachineSet() { + m.filterMachineSet = make(map[string]bool) + for _, machine := range m.filterMachineList { + m.filterMachineSet[machine] = true + } +} + +// getCachedDC returns the DC for a machine address, using cache for performance +func (m *model) getCachedDC(machineAddr string) string { + // Check cache first + if dc, found := m.machineDCCache[machineAddr]; found { + return dc + } + + // Extract and cache + dc := extractDCFromAddress(machineAddr) + m.machineDCCache[machineAddr] = dc + return dc +} + +// eventMatchesFilters checks if an event matches any of the filter patterns (OR logic) +// Returns true if: +// - showAll is true, OR +// - filterList is empty, OR +// - event matches at least one filter pattern +func eventMatchesFilters(event *TraceEvent, m *model) bool { + // If "All" is checked, show everything + if m.filterShowAll { + return true + } + + // If "All" is unchecked but no filters are set, show nothing + if len(m.filterRawList) == 0 && len(m.filterMachineList) == 0 && len(m.filterMachineDCs) == 0 && !m.filterTimeEnabled && !m.filterMessageEnabled { + return false + } + + // Apply filters with AND precedence: Time AND Machine AND Raw AND Message + + // 1. Time filter (highest precedence) + if m.filterTimeEnabled { + if event.TimeValue < m.filterTimeStart || event.TimeValue > m.filterTimeEnd { + return false + } + } + + // 2. Machine filter (OR within category) + if len(m.filterMachineList) > 0 || len(m.filterMachineDCs) > 0 { + machineMatches := false + + // Check if machine is in selected list using O(1) set lookup + if m.filterMachineSet[event.Machine] { + machineMatches = true + } + + // Check if machine's DC is selected using cached DC extraction + if !machineMatches { + dc := m.getCachedDC(event.Machine) + if dc != "" && m.filterMachineDCs[dc] { + machineMatches = true + } + } + + if !machineMatches { + return false + } + } + + // 3. Raw filters (OR within category) - skip disabled filters + if len(m.filterRawList) > 0 { + eventText := getEventFullText(event) + rawMatches := false + + for i := range m.filterRawList { + // Skip disabled filters + if m.filterRawDisabled[i] { + continue + } + + // Use pre-compiled regex for performance + if i < len(m.filterRawCompiledRegex) && m.filterRawCompiledRegex[i] != nil { + if m.filterRawCompiledRegex[i].MatchString(eventText) { + rawMatches = true + break + } + } + } + + if !rawMatches { + return false + } + } + + // 4. Message filter + if m.filterMessageEnabled { + if event.Type != "NetworkMessageSent" { + return false + } + } + + return true +} + +// normalizeAddress removes the :tls suffix from machine addresses for comparison +// FDB is inconsistent about including :tls in addresses, so we normalize for matching +// Example: "[abcd::2:0:1:1]:1:tls" -> "[abcd::2:0:1:1]:1" +func normalizeAddress(addr string) string { + return strings.TrimSuffix(addr, ":tls") +} + +// stripRPCName removes wrapper types (ErrorOr, EnsureTable) from RPC names +// Example: "ErrorOr>" -> "GetReadVersionReply" +// Example: "ErrorOr>" -> "Void" +func stripRPCName(rpcName string) string { + result := rpcName + + // Remove ErrorOr< and corresponding > + for { + start := strings.Index(result, "ErrorOr<") + if start == -1 { + break + } + // Find the matching closing > + depth := 1 + end := start + len("ErrorOr<") + for end < len(result) && depth > 0 { + if result[end] == '<' { + depth++ + } else if result[end] == '>' { + depth-- + } + end++ + } + // Extract the inner content + if depth == 0 && end <= len(result) { + result = result[start+len("ErrorOr<") : end-1] + } else { + break + } + } + + // Remove EnsureTable< and corresponding > + for { + start := strings.Index(result, "EnsureTable<") + if start == -1 { + break + } + // Find the matching closing > + depth := 1 + end := start + len("EnsureTable<") + for end < len(result) && depth > 0 { + if result[end] == '<' { + depth++ + } else if result[end] == '>' { + depth-- + } + end++ + } + // Extract the inner content + if depth == 0 && end <= len(result) { + result = result[start+len("EnsureTable<") : end-1] + } else { + break + } + } + + // Trim any remaining whitespace + result = strings.TrimSpace(result) + return result +} + +// runUI starts the Bubbletea TUI program +func runUI(traceData *TraceData) error { + p := tea.NewProgram( + newModel(traceData), + tea.WithAltScreen(), + ) + + if _, err := p.Run(); err != nil { + return fmt.Errorf("error running UI: %w", err) + } + + return nil +} diff --git a/design/LoadBalancing/LoadBalancing.md b/design/LoadBalancing/LoadBalancing.md index f67686b570b..fd8296a8c9e 100644 --- a/design/LoadBalancing/LoadBalancing.md +++ b/design/LoadBalancing/LoadBalancing.md @@ -18,7 +18,6 @@ In the following sections, the two LBs will be discussed in details. * Commit proxy interface * GetReadVersion proxy interface -* ConfigFollower interface Here, the interfaces are assumed to be always *fresh*, i.e. the list of the servers is fixed. @@ -224,4 +223,3 @@ while True: next_alt = (next_alt + 1) % alts.size() time.sleep(.2) ``` - diff --git a/design/bulkload-restore-integration.md b/design/bulkload-restore-integration.md new file mode 100644 index 00000000000..bf74f6eca09 --- /dev/null +++ b/design/bulkload-restore-integration.md @@ -0,0 +1,721 @@ +#Design for Integrating BulkDump / BulkLoad into Backup / Restore + +## Table of Contents + +1. [Objective](#objective) +2. [Background](#background) +3. [Requirements](#requirements) +4. [Design Overview](#design-overview) +5. [Detailed Design](#detailed-design) +6. [Alternatives Considered](#alternatives-considered) +7. [Testing Considerations](#testing-considerations) +8. [Observability/Supportability Considerations](#observabilitysupportability-considerations) +9. [Rollout/Migration Considerations](#rolloutmigration-considerations) +10. [References and Documentation](#references-and-documentation) + +--- + +## Objective + +Integrate BulkDump and BulkLoad technologies into FoundationDB's backup and restore systems to enable faster snapshot operations and provide the foundation for future capabilities like restore into live clusters. The integration maintains backward compatibility during the transition period while allowing users to opt into the improved performance characteristics of direct Storage Server coordination. + +## Background + +### Technical Ecosystem and Business Context + +FoundationDB's current backup and restore system consists of mature, production-proven mechanisms: + +**[Backup V2](https://github.com/apple/foundationdb/blob/main/design/backup_v2_partitioned_logs.md):** Creates backups by generating range files (snapshots) and continuously capturing mutation logs. Both are uploaded to S3 for durable storage. + +**Restore:** Recovers data by reading range files and mutation logs from S3, then applying them to a target cluster via the transaction system. + +**New Technologies:** +**[BulkDump](https://github.com/apple/foundationdb/blob/main/documentation/sphinx/source/bulkdump.rst)** and **[BulkLoad](https://github.com/apple/foundationdb/blob/main/documentation/sphinx/source/bulkload.rst)** (introduced in FDB 7.4) provide experimental alternatives that coordinate directly with Storage Servers via the Data Distributor, bypassing transaction system overhead. + +### Strategic Initiative Context + +Using BulkDump and BulkLoad will help speed up backup and restore. BulkDump and BulkLoad bypass the transaction system and run in parallel enabling faster snapshot and restore. They also facilitate our later objective of being able to load into a live cluster. + +### Current Limitations and Scope + +Currently, the BulkDump/BulkLoad system supports range-only data handling. Extending it to support range + mutation log restores will require significant additional work and is considered a long-term goal. The additional work includes: + +- Completing range-partitioned backup (ongoing) +- Implementing bulkload of mutation logs +- Evaluating bulkload performance with range-partitioned backup datasets + +In the near term, this project focuses on integrating BulkDump into the backup pipeline for snapshot generation and BulkLoad into the restore pipeline for range data consumption. This design doc is scoped specifically to that integration effort. + +## Requirements + +When integrating BulkDump and BulkLoad into the backup and restore snapshot system, two key problems must be addressed in addition to the integration itself: + +1. **Preserve simplicity**: As outlined earlier, BulkDump, BulkLoad, and Backup/Restore are independent systems. To keep the integration clean, Backup/Restore should treat BulkDump/BulkLoad as black boxes, avoiding unnecessary coupling or complexity. +2. **Enable fallback**: BulkDump and BulkLoad are still experimental, whereas Backup and Restore are mature and trusted systems. To ensure reliability, the integration must provide a fallback path — if BulkDump/BulkLoad fails, the system should seamlessly revert to the existing backup/restore mechanisms. + +Given these problems, we establish the following 4 requirements: + +### Functional Requirements + +1. **Single-Command Integration**: When running backup or restore operations, users can choose to enable the new snapshot system (BulkDump/BulkLoad) with existing commands. + - *Rationale*: Without this, users would need to invoke BulkDump, BulkLoad, and traditional backup/restore separately, which is error-prone and complex to manage. + +2. **Backward Compatibility**: Backup data generated with BulkDump can be restored using traditional range file restore methods, and traditional backup data can be restored using the existing range file + mutation log process through the transaction system. + - *Rationale*: Since BulkDump uses a completely different manifest and data file format (SSTs) compared to traditional range files, this requirement enables fallback scenarios and addresses failure conditions. + +3. **Performance Requirement**: Backup and restore times must be no longer than current implementations when using BulkDump/BulkLoad. + - *Rationale*: Avoids performance regressions when enabling the new snapshot system. + +4. **Encryption Compatibility**: BulkDump and BulkLoad perform encryption when backup/restore encryption is configured. + - *Rationale*: Required for production security compliance per File Level Encryption in FDB Backups specification. The design and timeline for this phase are still pending. + +Requirements 1 and 2 will be inherently satisfied once the design points outlined in the next section are implemented. Requirement 3 also appears feasible, as initial measurements show BulkDump/BulkLoad delivering performance benefits over the transaction-based approach currently used in backup and restore. + +## Design Overview + +### Architecture Evolution + +This integration represents one half of the evolution of Backup V2 to V3, specifically improving the snapshot system (the other part of V3 is range-partitioned backup mutation logs -- a separate effort). The key innovation here is the replacement of transaction-based snapshot handling with direct Storage Server coordination via BulkDump and BulkLoad (while maintaining backward compatibility during the transition). + +#### Current Backup Architecture (V2) + +```mermaid +graph TB + subgraph FDB_Cluster["FDB Cluster"] + SS1[Storage Server 1] + SS2[Storage Server 2] + SSN[Storage Server N] + + CP[Commit Proxy] + TLog[Transaction Logs] + + BA["Backup Agents
External TaskBucket processes"] + BW["Backup Workers
NEW in V2
Recruited by CC"] + + SS1 -->|Read snapshots| BA + SS2 -->|Read snapshots| BA + SSN -->|Read snapshots| BA + + CP -->|Write mutations| TLog + TLog -->|Pull mutations| BW + end + + subgraph S3_V2["S3 Bucket - Backup V2"] + RF["kvranges/
Snapshots from Backup Agents"] + ML["logs/
Partitioned logs from Backup Workers"] + end + + BA -->|Save snapshots| RF + BW -->|Save partitioned logs| ML + + style BW fill:#C8E6C9 + style ML fill:#C8E6C9 +``` + +**V2 Key Features:** +- **Backup Workers** (NEW in V2): Pull mutations from TLogs, save partitioned mutation logs to S3 +- **Backup Agents** (same as [V1](https://github.com/apple/foundationdb/blob/main/design/backup.md)): Read snapshots from Storage Servers, save range files to S3 +- **Mutation logs are partitioned** by log router tags (V2 innovation) +- **No backup mutations written to database** ([V1](https://github.com/apple/foundationdb/blob/main/design/backup.md) wrote them, V2 eliminated this) + +#### Current Restore Architecture (V2) + +```mermaid +graph TB + subgraph S3_Backup_V2["S3 Bucket - Backup V2"] + RF2["kvranges/"] + ML2["logs/
Partitioned"] + end + + RA["Restore Agents
TaskBucket-based

1. Read ranges
2. Read partitioned logs
3. Merge mutations"] + + subgraph Target_Cluster["Target FDB Cluster"] + CP2[Commit Proxy] + SS2[Storage Servers] + end + + RF2 -->|Read| RA + ML2 -->|Read| RA + RA -->|Transactions| CP2 + CP2 --> SS2 +``` + +**V2 Restore Process:** +- **Restore Agents** read range files and mutation logs from S3 +- **Transaction-based ingestion**: All data written through Commit Proxy using standard transaction system +- **Sequential processing**: Range data loaded first, then mutation logs applied +- **Performance bottleneck**: Transaction system creates overhead for large-scale data ingestion + +#### Backup Architecture (V3) with BulkDump + +```mermaid +graph TB + subgraph FDB_Cluster_V3["FDB Cluster"] + SS1[Storage Server 1] + SS2[Storage Server 2] + SSN[Storage Server N] + + DD["Data Distributor
Coordinates BulkDump"] + CP[Commit Proxy] + TLog[Transaction Logs] + + BA["Backup Agents
V2 TaskBucket
For fallback"] + BW["Backup Workers
V2 unchanged"] + BDTF["BulkDumpTaskFunc
NEW - coordinates SST generation"] + + SS1 -->|Read snapshots V2| BA + SS2 -->|Read snapshots V2| BA + SSN -->|Read snapshots V2| BA + + SS1 -->|Generate & Upload SSTs V3| DD + SS2 -->|Generate & Upload SSTs V3| DD + SSN -->|Generate & Upload SSTs V3| DD + + DD -->|Coordinate| BDTF + + CP -->|Write mutations| TLog + TLog -->|Pull mutations| BW + end + + subgraph S3_V3["S3 Bucket - Backup V3"] + RF["kvranges/
V2 fallback"] + ML["logs/
V2 partitioned"] + BD["bulkdump_data/
V3 NEW SST files"] + end + + BA -->|Save V2 snapshots| RF + BW -->|Save partitioned logs| ML + SS1 -->|Upload SSTs directly| BD + SS2 -->|Upload SSTs directly| BD + SSN -->|Upload SSTs directly| BD + + style BDTF fill:#FFF3CD + style DD fill:#FFF3CD + style BD fill:#FFF3CD +``` + +**V3 Key Changes:** +- **BulkDumpTaskFunc** (NEW): Coordinates BulkDump (introduced in 7.4) via Data Distributor +- **Data Distributor** coordinates Storage Servers to generate SST files using existing BulkDump implementation +- **Backup Agents** still (optionally) generate kvranges/ (V2 format, for fallback compatibility) +- **Backup Workers** still generate partitioned mutation logs in logs/ (V2, unchanged) +- **Short-term dual snapshot mode**: Both snapshot mechanisms run in parallel temporarily to enable direct comparison and validation + +### Terminology and Component Definitions + +- **`backup_agent`** = Executable binary that runs as a long-running process to execute TaskBucket tasks +- **`Backup Agents`** = Running instances of the `backup_agent` executable that perform backup operations +- **`Restore Agents`** = Running instances of the `backup_agent` executable that perform restore operations (same processes, different tasks) +- **`fdbbackup`** = Command-line tool that submits backup jobs to TaskBucket (does not execute the backup itself) +- **`fdbrestore`** = Command-line tool that submits restore jobs to TaskBucket (does not execute the restore itself) + +**Flow:** +1. User runs `fdbbackup start` → Submits backup job to TaskBucket +2. Running `backup_agent` processes pick up and execute the backup tasks +3. User runs `fdbrestore start` → Submits restore job to TaskBucket +4. Running `backup_agent` processes pick up and execute the restore tasks + +## Detailed Design + +### Command-Line Interface + +#### Backup Command with Mode +```bash +fdbbackup start --mode \ + -d \ + -t \ + [--timeout ] +``` + +**New Parameter:** +- `--mode `: Controls which snapshot mechanism(s) to use + - `rangefile` (default): Generate only traditional range files (V1/V2 method) + - `bulkdump`: Generate only BulkDump SST files + - `both`: Generate both formats (with unique filenames to prevent collision) + +#### Restore Command with Mode +```bash +fdbrestore start \ + -r \ + -t \ + --dest-cluster-file \ + [--mode ] +``` + +**New Parameter:** +- `--mode `: Controls which restore mechanism to use for range data + - `rangefile` (default): Use traditional range file restore from the `kvranges/` directory in S3 + - `bulkload`: Use BulkLoad for range data restoration if BulkDump dataset is available + +**Default behavior (traditional range files):** +1. Use traditional range file restore from the `kvranges/` directory in S3 +2. Apply mutation logs using traditional method +3. Does not use BulkLoad even if the `bulkdump_data/` directory is present in S3 + +**BulkLoad behavior (with --mode bulkload):** +1. Verify BulkDump dataset completeness +2. If complete: use BulkLoad for range data restoration +3. If incomplete: error (backup does not have the `bulkdump_data/` directory in S3) +4. After BulkLoad: apply mutation logs using traditional method + +### Data Format and Folder Structure + +#### Backup V3 +``` +s3://bucket/backup-2025-01-20-23-17-10.123456/ +├── kvranges/ # V2 snapshot format (fallback compatibility) +│ ├── snapshot.000000000001234567/ +│ │ └── 0/ +│ │ ├── range,1980422,c5c81efaa67c1b7bb5e17c756f3b2416,1048576 +│ │ ├── range,1998818,192536233eafb59e5e854faf1b35d5ca,1048576 +│ │ └── ... +│ └── ... +├── snapshots/ # Snapshot metadata +│ └── snapshot,1980422,2025711,570 +├── logs/ # V2: Partitioned logs (unchanged from V2) +│ └── 0000/ +│ └── 0000/ +│ ├── log,1923285,21923285,392f2edb4fa32c2af5171686a6b7f8bb,1048576 +│ └── ... +├── properties/ # Backup properties +│ ├── log_begin_version +│ ├── log_end_version +│ └── mutation_log_type +└── bulkdump_data/ # V3 NEW: BulkDump SST format + └── / # Job-specific directory + ├── job-manifest.txt # Top-level job manifest + ├── 0/ # Shard/range directory (shard 0) + │ ├── -manifest.txt # Shard manifest + │ └── -data.sst # Shard SST data file + ├── 1/ # Shard/range directory (shard 1) + │ ├── -manifest.txt + │ └── -data.sst + └── ... # Additional shards +``` + +**V3 Design Notes:** +- During initial validation period, V3 backups contain **both** snapshot formats for comparison +- `kvranges/` enables fallback to V2 restore method and side-by-side validation +- `bulkdump_data/` provides faster restore via direct SST ingestion using BulkDump's native layout - no conversion or adaptation needed +- `logs/` format unchanged from V2 (partitioned logs) +- **Snapshot Relationship**: The `bulkdump_data/` directory contains SST-formatted snapshot data at the same version as the traditional snapshots in `snapshots/` and `kvranges/` directories. Each represents the same point-in-time snapshot but in different formats (SST vs range files). +- After validation: Only `bulkdump_data/` will be generated; `kvranges/` will be deprecated + +### Integration Strategy + +To achieve the requirements, we propose the following designs: + +#### 1. Snapshot Generation in Backup (BulkDump Integration) + +**Details:** +- **Command Interface**: `fdbbackup start --mode both` enables BulkDump-based snapshot generation alongside traditional range files +- **Implementation**: A new task, `BulkDumpTaskFunc`, coordinates with the Data Distributor to have Storage Servers generate SST files directly +- **Parallel Execution**: BulkDumpTaskFunc runs in parallel with existing backup processes (mutation log capture continues unchanged) +- **Output**: Creates `bulkdump_data/` folder containing SST files alongside traditional `kvranges/` folder for fallback + +**Rationale:** This replaces the current inefficient process where backup agents read data through transactions and generate range files, providing direct Storage Server coordination for faster snapshot generation. + +#### 2. Snapshot Consumption in Restore (BulkLoad Integration) + +**Details:** +- **Command Interface**: `fdbrestore start --mode bulkload` enables BulkLoad-based range data restoration; default behavior uses traditional range files +- **Implementation**: A new task, `BulkLoadTaskFunc`, is inserted before normal restore tasks to handle SST ingestion when --mode bulkload is specified +- **Direct Ingestion**: BulkLoadTaskFunc delegates to BulkLoad system to directly inject SST files into Storage Servers via Data Distributor +- **Phased Approach**: BulkLoad completes all range data loading, then traditional mutation log replay begins + +**Rationale:** This provides an opt-in path to replace the current transaction-based range file consumption with direct SST ingestion for faster range data loading. + +#### 3. Dual Dataset Approach for Compatibility + +**Details:** +- **Backup V3 Structure**: Contains both `kvranges/` (traditional) and `bulkdump_data/` (SST-based) folders +- **Automatic Detection**: Restore automatically detects available dataset types and chooses appropriate method +- **Fallback Support**: Traditional restore method remains available even for BulkDump-enabled backups +- **Validation Period**: Both formats generated initially to enable comparison and validation + +**Rationale:** +- Ensures existing backup file format compatibility during transition +- Eliminates need for format converters between incompatible range files and SST files +- Provides reliable fallback path addressing the experimental nature of BulkDump/BulkLoad + +#### 4. Black Box Integration Approach + +**Details:** + +**Backup Integration:** +- `BulkDumpTaskFunc` treats BulkDump system as black box, delegating coordination to Data Distributor +- Runs in parallel with existing backup DAG without modifying core backup logic +- Monitors execution time and implements timeout policies to prevent backup deadline violations + +**Restore Integration:** +- `BulkLoadTaskFunc` treats BulkLoad system as black box, delegating SST ingestion to Data Distributor +- Integrates into existing restore task sequence without modifying core restore logic +- Provides clear handoff to traditional mutation log replay after SST loading completes + +**Rationale:** This approach preserves simplicity, reduces coupling between systems, and enables independent evolution of BulkDump/BulkLoad vs Backup/Restore systems. + +### Restore Data Flow (V3) with BulkLoad + +```mermaid +graph TB + subgraph S3_Backup_V3["S3 Bucket - Backup V3"] + RF3["kvranges/
V2 fallback"] + ML3["logs/
V2 partitioned"] + BD3["bulkdump_data/
V3 SST files"] + end + + RC["fdbrestore

Submits restore job
to TaskBucket"] + + TB["TaskBucket

Orchestrates task
dependencies"] + + BLTF["BulkLoadTaskFunc NEW

1. Verify manifest
2. Start BulkLoad"] + + BLS["BulkLoad System
Black box

Direct SST ingestion"] + + RA3["Backup Agents
TaskBucket execution

Apply mutation logs"] + + subgraph Target_Cluster_V3["Target FDB Cluster"] + DD3[Data Distributor] + SS3[Storage Servers] + CP3[Commit Proxy] + end + + RC -->|Submit job| TB + TB -->|1. Execute| BLTF + BD3 -->|Read SSTs| BLTF + BLTF -->|Delegate| BLS + BLS -->|Direct injection
Bypass transactions| DD3 + DD3 -->|Ingest SSTs| SS3 + + TB -->|2. After BulkLoad done| RA3 + + ML3 -->|Read partitioned logs| RA3 + RA3 -->|Transactions| CP3 + CP3 --> SS3 + + style BLTF fill:#FFF3CD + style BLS fill:#FFF3CD + style BD3 fill:#FFF3CD + style TB fill:#F8D7DA +``` + +**V3 Restore Flow:** +1. **fdbrestore** submits restore job to TaskBucket (then exits or waits for completion) +2. **TaskBucket** orchestrates task execution via Backup Agents based on the `--mode` flag: + - If `--mode bulkload`: executes BulkLoadTaskFunc to verify manifest and run BulkLoad + - If default: uses traditional range file restore from kvranges/ +3. **BulkLoad system** (when used) directly injects SST files into Storage Servers via DD +4. **Phase 2 - Mutation Replay:** After range data loading completes, TaskBucket triggers mutation log replay using V2 method (transaction-based) +5. TaskBucket task dependencies ensure proper sequencing: range data first (via chosen method), then mutation logs + +#### 4. BulkDump Resilience and Continuous Operation + +**Details:** +- BulkDump follows the "retry forever" philosophy like traditional backup operations +- BulkDump continuously retries on transient failures (network issues, temporary S3 problems, etc.) with exponential backoff +- BulkDump runs until successful completion or explicit operator cancellation +- If backup period ends before BulkDump completes, it continues in the next backup period +- When Restore runs with BulkLoad enabled, the mechanism verifies whether a complete BulkDump dataset is available. If not, it returns an error to the user. In this case, the user may still rerun Restore without BulkLoad to recover the data + +**Rationale:** This resilient approach ensures BulkDump completes successfully under normal operational conditions while maintaining the proven reliability patterns of traditional backup systems. + +#### 5. BulkDump Dataset Verification + +**Details:** +- When `--mode bulkload` is specified, fdbrestore first verifies that a complete BulkDump dataset exists in `bulkdump_data/` +- Verification checks job manifest completeness and shard manifest integrity +- If complete dataset found: restore proceeds with BulkLoad-based range data loading +- If incomplete or missing dataset: restore returns error with clear message directing user to retry with `--mode rangefile` + +**Rationale:** This provides early detection of incomplete BulkDump datasets and clear guidance for fallback, fulfilling the backward compatibility requirement. + +### Configuration Requirements + +#### Cluster Prerequisites for BulkLoad Integration + +BulkLoad operations require specific cluster configuration to function correctly. **BulkLoad automatically validates these prerequisites** and provides clear error messages if configuration is invalid. + +**Required Server Knobs (ALL Processes):** + +For BulkLoad Operations: +```bash +--knob_shard_encode_location_metadata=1 # Enhanced location metadata with shard IDs +--knob_enable_read_lock_on_range=1 # Exclusive range locking for data integrity +``` + +For BulkDump Operations: +```bash +#No additional knobs required - works with default configuration +``` + +**Configuration Steps:** +1. **Configure knobs** on all FDB processes (fdbserver command line or config file) +2. **Restart cluster** (in Kubernetes environments, the operator restarts processes via kill command) +3. **Trigger database wiggle** to rewrite all shard metadata with enhanced location encoding +4. **Validate configuration** - BulkLoad operations automatically verify prerequisites + +**Database Wiggle Requirement:** +The `knob_shard_encode_location_metadata=1` setting changes how shard location metadata is encoded. Existing shards have metadata written in the old format, so a database wiggle is required to force all shards to rewrite their metadata with the new encoding that includes shard IDs required for BulkLoad operations. + +```bash +#Trigger database wiggle after cluster restart +fdbcli --exec "configure perpetual_storage_wiggle=1" +#Monitor wiggle completion before using BulkLoad +fdbcli --exec "status details" +``` + +**Example Process Configuration:** +```bash +fdbserver --knob_shard_encode_location_metadata=1 \ + --knob_enable_read_lock_on_range=1 \ + [other standard options] +``` + +**Automatic Validation:** +```cpp +// Validation performed during BulkLoad submission +if (!SERVER_KNOBS->SHARD_ENCODE_LOCATION_METADATA) { + throw bulkload_invalid_configuration("BulkLoad requires --knob_shard_encode_location_metadata=1. " + "Restart cluster with this knob enabled."); +} + +if (!SERVER_KNOBS->ENABLE_READ_LOCK_ON_RANGE) { + throw bulkload_invalid_configuration("BulkLoad requires --knob_enable_read_lock_on_range=1. " + "Restart cluster with this knob enabled."); +} +``` + +**Important:** The knob validation above only checks that knobs are enabled. It does not verify that the database wiggle has completed and all shard metadata is in the new format. If BulkLoad encounters shards with old-format metadata, it will fail at runtime. Operators must ensure the database wiggle has fully completed before using BulkLoad. Monitor wiggle progress via `fdbcli --exec "status details"` and verify no shards are pending migration. + +## Alternatives Considered + +### Alternative 1: Single Dataset with Converter +**Approach**: Generate only BulkDump SST files and create a converter to traditional range files when needed. + +**Pros**: +- Simpler storage structure +- No duplication of snapshot data + +**Cons**: +- Conversion overhead impacts restore performance +- Complex converter logic introduces failure points +- Cannot leverage proven reliability of existing backup format + +**Decision**: Rejected in favor of dual dataset approach for reliability and performance. + +### Alternative 2: Migration-Based Integration +**Approach**: Deprecate traditional backup immediately and require migration period. + +**Pros**: +- Cleaner long-term architecture +- Forces adoption of new system + +**Cons**: +- High migration risk for production systems +- No fallback during transition period +- Requires complex migration tooling + +**Decision**: Rejected in favor of gradual transition with fallback support. + +### Alternative 3: Tightly Coupled Integration +**Approach**: Integrate BulkLoad directly into existing Backup/Restore task logic. + +**Pros**: +- Potentially more efficient coordination +- Single unified control flow + +**Cons**: +- Violates black box principle +- Increases coupling between independent systems +- More complex testing and maintenance +- Harder to isolate failures + +**Decision**: Rejected in favor of black box approach for maintainability. + +## Testing Considerations + +### Development Phases and Testing Strategy + +The implementation is divided into four phases with specific testing criteria: + +#### Phase 1: Simulation Testing for BulkDump/BulkLoad-enabled Backup/Restore +**Implementation**: +- Inject BulkDump failures in BackupWithBulkDump simulations to verify Backup handles failures correctly +- Inject BulkLoad failures in BackupWithBulkDump simulations to verify Restore handles failures correctly +- Test dual dataset scenarios and fallback mechanisms + +**Completion Criteria**: High confidence in correctness under failure conditions for both backup and restore operations. + +#### Phase 2: CTest for BulkDump/BulkLoad-enabled Backup/Restore (runs in parallel with Step 1) +**Implementation**: +- Run CTests with BulkDump-enabled Backup/BulkLoad-enabled Restore + +**Completion Criteria**: Confidence that the new snapshot system runs correctly with S3. + +#### Phase 3: K8s Testing for BulkDump/BulkLoad-enabled Backup/Restore (runs in parallel with Step 1) +**Implementation**: +- Current Backup/Restore K8s tests (maintained by Johannes Scheuermann) run with SeaweedFS. For BulkDump/BulkLoad, we aim to test against real S3 +- Add a K8s test suite for the new snapshot system + +**Completion Criteria**: Confidence that Backup/Restore with the new snapshot system works at large scale in cluster environments. + +#### Phase 4: Production-like Cluster validation (TBD) +**Implementation**: Production environment validation (timeline to be determined). + +### Representative Test Cases +1. **Happy Path**: Backup with `--mode bulkdump`, restore with `--mode bulkload` +2. **Both Mode**: Backup with `--mode both`, restore testing both `--mode bulkload` and default methods +3. **Fallback Scenario**: BulkDump backup, traditional restore with range files (backward compatibility) +4. **Timeout Scenario**: BulkDump times out, traditional backup completes +4. **Incomplete Dataset**: Restore with `--mode bulkload` fails gracefully when BulkDump data incomplete +5. **Performance Comparison**: BulkDump backup ≤ traditional backup time, BulkLoad restore ≤ traditional restore time + +## Observability/Supportability Considerations + +### Metrics and Monitoring + +#### Integration-Specific Trace Events + +**Backup Integration Trace Events:** +```cpp +TraceEvent("BackupBulkDumpIntegrationStart") + .detail("BackupURL", url) + .detail("SnapshotMode", mode); // bulkdump, rangefile, or both + +TraceEvent("BackupDualSnapshotComplete") + .detail("RangeFilesBytes", rangeBytes) + .detail("BulkDumpBytes", bulkBytes); // NEW: for validation comparison +``` + +**Restore Integration Trace Events:** +```cpp +TraceEvent("RestoreSnapshotMethodSelected") + .detail("Method", method) // bulkload or rangefile + .detail("BulkLoadAvailable", available); +``` + +#### Status Command Enhancements + +**Backup Status Enhancements:** +```bash +fdbbackup status -d + +#New fields in output: +#Snapshot Mode : bulkdump | rangefile | both +#BulkLoad Compatible : yes | no +``` + +**Restore Status Enhancements:** +```bash +fdbrestore status + +#New fields in output: +#Snapshot Method : bulkload | rangefile +#Snapshot Phase : complete | in_progress | not_started +#Mutation Log Phase : complete | in_progress | not_started +``` + +### Key Metrics to Monitor +- **BulkDump completion rate** during backup operations +- **BulkLoad success rate** during restore operations +- **Fallback usage frequency** (indicates BulkDump/BulkLoad reliability) +- **Backup performance comparison** (BulkDump vs traditional range file generation) +- **Restore performance comparison** (BulkLoad vs traditional range file consumption) +- **Dual dataset size overhead** during transition period + +## Rollout/Migration Considerations + +### Rollout Strategy + +#### Phase 1: Opt-in Integration (8.0 Release) +- BulkDump/BulkLoad integration available via `--mode both` flag for backup and `--mode bulkload` flag for restore +- Traditional range file backup/restore remains default and as fallback if bulkdump fails. +- **Duration**: 6 months minimum for production validation + +#### Phase 2: Default Transition (8.1+ Release) +- BulkDump becomes default snapshot generation mechanism for new backups +- Range file generation maintained for compatibility +- Performance and reliability validation complete +- **Duration**: 12 months for ecosystem adoption + +#### Phase 3: Deprecation (Future Release) +- Range file generation deprecated (still readable) +- BulkDump/BulkLoad becomes primary snapshot system +- **Duration**: TBD based on adoption metrics + +### Migration Requirements + +#### Backward Compatibility +- **FDB 8.0+** automatically detects and restores pre-8.0 backups using range files +- **No manual migration** required for existing backup datasets +- **Transparent operation** for users upgrading clusters + +#### Configuration Migration +No configuration changes beyond those required by BulkLoad -- see above -- are required for basic functionality. Optional performance optimization knobs available for advanced users. + +#### Error Handling and Recovery + +**General Philosophy:** + +BulkDump/BulkLoad integration uses the same error handling as current backup/restore: continuous retries with warnings until succeeds (or intervention). The system is designed to avoid manual intervention - failures are rare and most issues resolve automatically. + +**Failure Scenarios & Handling:** + +1. **BulkDump Fails During Backup (BulkDump Mode)** + - When user explicitly uses `--mode bulkdump`, backup fails completely (no fallback in bulkdump-only mode) + - Error logged: `backup_bulkdump_failed` + - User must retry with `--mode rangefile` (default) or `--mode both` + +2. **Incomplete BulkDump Dataset at Restore** + - Restore pre-flight check verifies manifest completeness + - If incomplete: throw `restore_bulkload_dataset_incomplete` error + - Error message: "BulkDump dataset incomplete. Use --mode rangefile" + +3. **BulkLoad Fails During Restore** + - When user specifies `--mode bulkload`, BulkLoad task fails and logs error + - Restore fails with error: `restore_bulkload_failed` + - Recovery options (in order of preference): + - **Format fallback**: If backup has `kvranges/` (created with `--mode both`), retry with `--mode rangefile` using same snapshot + - **Older snapshot fallback**: Use an older snapshot from the backup dataset (if available) and replay more mutation logs to reach target version + - **No recovery**: If only current BulkDump snapshot exists and BulkLoad consistently fails, restore cannot proceed + +**Error Code Definitions:** +```cpp +// New error codes for BulkLoad integration +error_code_actor restore_bulkload_dataset_incomplete() +error_code_actor restore_bulkload_failed() +error_code_actor backup_bulkdump_timeout() +error_code_actor backup_bulkdump_failed() +error_code_actor bulkload_invalid_configuration() // NEW: Configuration validation +``` + +### Rollback Considerations + +#### Emergency Rollback +If critical issues are discovered post-deployment: +1. **Disable BulkDump** in new backups (revert to default `--mode rangefile`) +2. **Use traditional restore** for all operations (use `--mode rangefile`) +3. **Existing data** remains accessible through range files (for backups created with dual datasets) +4. **No data loss** due to dual dataset approach during transition period + +--- + +## References and Documentation + +### Related Documents +- [Backup V2 Design](https://github.com/apple/foundationdb/blob/main/design/backup_v2_partitioned_logs.md) +- [BulkDump Documentation](https://github.com/apple/foundationdb/blob/main/documentation/sphinx/source/bulkdump.rst) +- [BulkLoad User Guide](https://github.com/apple/foundationdb/blob/main/documentation/sphinx/source/bulkload-user.rst) + +### Key Concepts Reference +- **Backup Agents**: External TaskBucket processes executing traditional backup-related tasks (range file generation) +- **BulkDump**: Experimental feature that replaces traditional snapshot generation by having Storage Servers produce SST files via Data Distributor coordination +- **BulkLoad**: Experimental feature that replaces traditional snapshot consumption by directly ingesting SST files into Storage Servers, bypassing transactions +- **TaskBucket**: FoundationDB's distributed task execution framework +- **SST Files**: Sorted String Table files - efficient binary format for storing key-value data, used by BulkDump/BulkLoad +- **Data Distributor (DD)**: FoundationDB component that coordinates Storage Server operations, used by BulkDump/BulkLoad for snapshot system coordination +- **Dual Dataset Approach**: Transition strategy generating both traditional range files and SST files during validation period + +### Security Considerations + +**Encryption integration is not yet designed.** For production use requiring encryption, use traditional backup/restore without `--mode bulkdump` or BulkLoad. diff --git a/design/coroutines.md b/design/coroutines.md index 35e01bb7e8b..230408a7144 100644 --- a/design/coroutines.md +++ b/design/coroutines.md @@ -41,13 +41,22 @@ can be freely mixed, but new code should be written using coroutines. ## Coroutines vs ACTORs +### Performance Characteristics + +**For detailed performance analysis, benchmarking results, and optimization techniques, see [`COROUTINE_PERF_ANALYSIS.md`](../COROUTINE_PERF_ANALYSIS.md).** + +**Key Summary**: C++20 coroutines show pattern-dependent performance: +- **Excellent** for suspension-heavy patterns (YIELD: +83% faster than actors) +- **Competitive** for allocation-heavy patterns (NET2: 3-8% slower than actors) +- **Production ready** with performance characteristics suitable for most FDB workloads + +## Basic Types + It is important to understand that C++ coroutine support doesn't change anything in Flow: they are not a replacement of Flow but they replace the actor compiler with a C++ compiler. This means, that the network loop, all Flow types, the RPC layer, and the simulator all remain unchanged. A coroutine simply returns a special `SAV` which has handle to a coroutine. -## Basic Types - As defined in the C++20 standard, a function is a coroutine if its body contains at least one `co_await`, `co_yield`, or `co_return` statement. However, in order for this to work, the return type needs an underlying coroutine implementation. Flow provides these for the following types: @@ -366,8 +375,8 @@ The above coroutine returns a stream of random strings. The memory is owned by t a `StringRef` and then reuses the memory in the next iteration. This makes this generator very cheap to use, as it only does one allocation in its lifetime. With eager execution, this would be much harder to write (and reason about): the coroutine would immediately generate a string and then eagerly compute the next one when the string is retrieved. -However, in Flow a `co_yield` is guarantee to suspend the coroutine until the value was consumed (this is not generally -a guarantee with `co_yield` -- C++ coroutines give the implementor a great degree of freedom over decisions like this). +However, in Flow a `co_yield` is guaranteed to suspend the coroutine until the value was consumed (this is not generally +a guarantee with `co_yield` -- C++ coroutines give the implementer a great degree of freedom over decisions like this). ### Generators vs Promise Streams @@ -698,4 +707,238 @@ doesn't seem like UBSAN finds these kind of subtle bugs. Another difference is, that if a `state` variables might be initialized twice: once at the creation of the actor using the default constructor and a second time at the point where the variable is initialized in the code. With C++ -coroutines we now get the expected behavior, which is better, but nonetheless a potential behavior change. \ No newline at end of file +coroutines we now get the expected behavior, which is better, but nonetheless a potential behavior change. + +### `state` Variables Inside Blocks + +The actor compiler **hoists** all `state` variables into the actor's state struct, regardless of C++ block scope. This +means a `state` variable declared inside an `if`, `else`, `for`, or `try` block lives for the entire actor lifetime. +In a coroutine, these become regular C++ locals that follow normal scoping rules. + +This is a source of subtle bugs. Consider: + +```c++ +ACTOR Future example() { + if (someCondition) { + state Future background = longRunningTask(); + } + // In ACTOR code, `background` is still alive here — it was hoisted. + wait(delay(100.0)); + return Void(); +} +``` + +A naive conversion: + +```c++ +Future example() { + if (someCondition) { + Future background = longRunningTask(); + } + // BUG: `background` was destroyed at the `}` above, cancelling longRunningTask()! + co_await delay(100.0); +} +``` + +The fix is to move the variable to function scope: + +```c++ +Future example() { + Future background; + if (someCondition) { + background = longRunningTask(); + } + // `background` is still alive — correct. + co_await delay(100.0); +} +``` + +**Rule**: When removing `state` from a variable, check whether it is declared inside a block. If so, move the +declaration to function scope. + +### `const&` Parameters + +C++20 coroutines only store a reference in the coroutine frame for `const&` parameters — they do **not** copy the +argument. If the caller passes a temporary (e.g. a default argument value, or a local that goes out of scope), the +reference dangles after the first suspend point. + +```c++ +// DANGEROUS: if caller passes a temporary, `key` dangles after first co_await +Future doSomething(Key const& key) { + co_await delay(1.0); + fmt::print("{}\n", key.toString()); // potential use-after-free +} +``` + +The fix is to copy `const&` parameters to locals before the first `co_await`: + +```c++ +Future doSomething(Key const& key) { + Key keyCopy = key; // safe copy before any suspend + co_await delay(1.0); + fmt::print("{}\n", keyCopy.toString()); // OK +} +``` + +**Rule**: Copy all `const&` parameters to local variables before the first `co_await`. + +### Forward Declarations in `.actor.h` Files + +When a function is converted from `ACTOR` to a coroutine, any forward declarations in `.actor.h` files must have the +`ACTOR` keyword removed. The actor compiler automatically adds `const&` to all parameters in `ACTOR` declarations. +If you also write `const&` explicitly, the generated code will contain `const& const&`, which is a compile error. + +```c++ +// workloads.actor.h — WRONG: ACTOR + const& = double const& +ACTOR Future foo(Database const& cx); + +// workloads.actor.h — CORRECT: remove ACTOR since foo() is now a coroutine +Future foo(Database const& cx); +``` + +### File Naming + +Converted files should be renamed from `.actor.cpp` to `.cpp` (or `.actor.h` to `.h`) since they no longer need the +actor compiler. Both `fdbserver` and `flowbench` use `fdb_find_sources()` in their `CMakeLists.txt`, which +automatically picks up files by glob, so the rename is usually sufficient without any CMake changes. + +### Conversion Checklist + +1. Rename the file from `.actor.cpp` to `.cpp`. +2. Remove `ACTOR` from all function definitions. +3. Remove `UNCANCELLABLE`; add `Uncancellable` as the first parameter instead. +4. Remove `state` from all local variable declarations. + - **Check**: is the variable inside a block (`if`/`else`/`for`/`try`)? If so, move it to function scope. +5. Replace `wait(expr)` with `co_await expr`. Replace `waitNext(expr)` with `co_await expr`. +6. Replace `return expr` with `co_return expr`. Replace `return Void()` with `co_return`. +7. Rewrite `choose`/`when` using the `Choose` class. +8. Simplify: `wait(success(f))` → `co_await f`; `wait(store(v, f))` → `v = co_await f`. +9. For `const&` parameters: copy to a local before the first `co_await`. +10. Remove `ACTOR` from any forward declarations of the converted functions in `.actor.h` files. +11. Build and run simulation tests to verify correctness. + +## Performance Analysis & Optimization + +### Performance Summary (Updated February 2026) + +Through optimization and profiling analysis, C++20 coroutines have made some performance improvements, reducing the gap with ACTOR-generated code from ~10% to 3-8% depending on workload patterns. + +#### Current Linux Performance Results (32-core, 3.1 GHz) + +``` +Benchmark Type ACTOR Performance Coroutine Performance Gap Status +-------------- ---------------- -------------------- --- ------ +NET2/4096 2.67M/s 2.41M/s -8.5% Target for optimization +YIELD/4096 7.45M/s 13.6M/s +83% Coroutines much faster ✅ +DELAY/4096 1.44M/s 5.22M/s +260% Coroutines much faster ✅ +CALLBACK/1024/64 50.9M/s 8.7M/s (some patterns) -82% Mixed results +``` + +#### Key Insight: Workload Pattern Dependency + +**Coroutines excel in frame-reuse patterns** (YIELD, DELAY) where a single coroutine is suspended/resumed many times. + +**Coroutines lag in allocation-heavy patterns** (NET2) where many short-lived coroutines are created and destroyed. + +### Performance Analysis Deep Dive + +#### Root Cause Identification (February 2026) + +**Original Analysis**: Coroutines had 39.13% CPU overhead in `final_suspend()` that actors completely avoid. + +``` +ACTORS (2.67M/s): 43.31% CPU in direct ActorCallback::fire() +COROUTINES (2.41M/s): 35.61% CPU in QuorumCallback + other overhead = ~75% total +``` + +#### Fix + +**Implementation**: Moved SAV cleanup from `final_suspend()` to `return_value()` to match actor completion timing. + +**Result**: Eliminated final_suspend() overhead from performance profiles (39.13% → 0.21% CPU usage). + +### Current Bottlenecks (February 2026) + +Based on comprehensive Linux profiling of optimized coroutines: + +#### 1. QuorumCallback Overhead (35.61% CPU) +- **Impact**: Shared bottleneck between actors and coroutines +- **Cause**: Callback chain traversal in SAV system +- **Optimization**: Compiler hints provide minimal improvement + +#### 2. FastAllocator<128> Waste (7.19% CPU) +- **Impact**: Frame allocation overhead in NET2 pattern +- **Cause**: Some coroutine frames exceed 64-byte optimal bucket size +- **Evidence**: 3.69% allocate + 3.50% release CPU usage +- **Attempts**: Custom allocator forcing provided <1% improvement + +#### 3. AwaitableFuture Operations (3.66% CPU) +- **Impact**: Coroutine-specific suspend/resume overhead +- **Components**: 2.79% fire() + 0.87% resumeImpl() +- **Nature**: Inherent to C++20 coroutine mechanics + +### Optimization Techniques - What Works and What Doesn't + +#### ✅ Successful Optimizations + +1. **Compiler optimization hints**: `__attribute__((hot))`, `__attribute__((always_inline))`, `__attribute__((flatten))` + - **Impact**: 2-5% performance improvements in hot paths + +2. **Branch prediction hints**: `[[likely]]`, `[[unlikely]]` + - **Impact**: Optimizes common vs error paths + +3. **Architectural changes**: Moving SAV cleanup from final_suspend() to return_value() + - **Impact**: Eliminated 39.13% CPU bottleneck (99.5% reduction) + +#### ❌ Ineffective Optimizations + +1. **Custom FastAllocator forcing**: Attempted to force frames into smaller buckets + - **Result**: Only 0.82% reduction in FastAllocator<128> overhead + - **Risk**: Unsafe for frames that don't fit smaller buckets + +2. **Frame packing**: `__attribute__((packed))`, pointer bit-packing + - **Result**: Added overhead from indirection outweighed space savings + - **Issue**: Increased function call overhead + +3. **Aggressive final_suspend() bypass**: Attempted to skip SAV operations entirely + - **Result**: Broke Flow's reference counting semantics (double-free crashes) + +### Performance Comparison by Platform + +#### Linux (Release, -O3) +- **Coroutines**: 2.41M/s NET2, 13.6M/s YIELD +- **Actors**: 2.67M/s NET2, 7.45M/s YIELD + +#### macOS (Debug, -g) +- **Coroutines**: 930k/s NET2 (significantly slower) +- **Platform difference**: 2.56x performance gap between Linux and macOS + +### Benchmark Comparison Tool + +#### Generating Comprehensive Performance Reports + +To generate complete actor vs coroutine performance comparison reports (matching historical format): + +```bash +cd build_output # or your build directory +python3 ../contrib/benchmark_comparison.py +``` + +**Output**: Complete comparison across all benchmark types: +- DELAY benchmarks (DELAY + YIELD variants, all scales) +- NET2 benchmarks (allocation-heavy patterns, all scales) +- CALLBACK benchmarks (various template sizes and scales) +- OVERALL_GEOMEAN calculations for statistical analysis + +**Requirements**: +- Working flowbench binary with both actor and coroutine benchmarks +- Benchmark infrastructure must include: bench_net2, coroutine_net2, bench_delay, coroutine_delay_bench, coroutine_yield_bench, bench_callback, coroutine_callback + +**Usage**: Tool automatically runs benchmarks and generates comparison report in the format matching historical coroutine optimization reports. + +### Future Optimization Opportunities + +#### High-Impact Targets +1. **QuorumCallback optimization** (35.61% CPU) - requires deeper architectural changes +2. **Frame allocation strategy** - investigate frame pooling for allocation-heavy patterns +3. **Profile-guided optimization** - compiler-level optimization based on runtime profiles diff --git a/design/dynamic-knobs.md b/design/dynamic-knobs.md deleted file mode 100644 index 3bd03404d61..00000000000 --- a/design/dynamic-knobs.md +++ /dev/null @@ -1,455 +0,0 @@ -# Dynamic Knobs - -This document is largely adapted from original design documents by Markus -Pilman and Trevor Clinkenbeard. - -## Background - -FoundationDB parameters control the behavior of the database, including whether -certain features are available and the value of internal constants. Parameters -will be referred to as knobs for the remainder of this document. Currently, -these knobs are configured through arguments passed to `fdbserver` processes, -often controlled by `fdbmonitor`. This has a number of problems: - -1. Updating knobs involves updating `foundationdb.conf` files on each host in a - cluster. This has a lot of overhead and typically requires external tooling - for large scale changes. -2. All knob changes require a process restart. -3. We can't easily track the history of knob changes. - -## Overview - -The dynamic knobs project creates a strictly serializable quorum-based -configuration database stored on the coordinators. Each `fdbserver` process -specifies a configuration path and applies knob overrides from the -configuration database for its specified classes. - -### Caveats - -The configuration database explicitly does not support the following: - -1. A high load. The update rate, while not specified, should be relatively low. -2. A large amount of data. The database is meant to be relatively small (under - one megabyte). Data is not sharded and every coordinator stores a complete - copy. -3. Concurrent writes. At most one write can succeed at a time, and clients must - retry their failed writes. - -## Design - -### Configuration Path - -Each `fdbserver` process can now include a `--config_path` argument specifying -its configuration path. A configuration path is a hierarchical list of -configuration classes specifying which knob overrides the `fdbserver` process -should apply from the configuration database. For example: - -```bash -$ fdbserver --config_path classA/classB/classC ... -``` - -Knob overrides follow descending priority: - -1. Manually specified command line knobs. -2. Individual configuration class overrides. - * Subdirectories override parent directories. For example, if the - configuration path is `az-1/storage/gp3`, the `gp3` configuration takes - priority over the `storage` configuration, which takes priority over the - `az-1` configuration. -3. Global configuration knobs. -4. Default knob values. - -#### Example - -For example, imagine an `fdbserver` process run as follows: - -```bash -$ fdbserver --datadir /mnt/fdb/storage/4500 --logdir /var/log/foundationdb --public_address auto:4500 --config_path az-1/storage/gp3 --knob_disable_asserts false -``` - -And the configuration database contains: - -| ConfigClass | KnobName | KnobValue | -|-------------|---------------------|-----------| -| az-2 | page_cache_4k | 8e9 | -| storage | min_trace_severity | 20 | -| az-1 | compaction_interval | 280 | -| storage | compaction_interval | 350 | -| az-1 | disable_asserts | true | -| \ | max_metric_size | 5000 | -| gp3 | max_metric_size | 1000 | - -The final configuration for the process will be: - -| KnobName | KnobValue | Explanation | -|---------------------|-------------|-------------| -| page_cache_4k | \ | The configuration database knob override for `az-2` is ignored, so the compiled default is used | -| min_trace_severity | 20 | Because the `storage` configuration class is part of the process’s configuration path, the corresponding knob override is applied from the configuration database | -| compaction_interval | 350 | The `storage` knob override takes precedence over the `az-1` knob override | -| disable_asserts | false | This knob is manually overridden, so all other overrides are ignored | -| max_metric_size | 1000 | Knob overrides for specific configuration classes take precedence over global knob overrides, so the global override is ignored | - -### Clients - -Clients can write to the configuration database using transactions. -Configuration database transactions are differentiated from regular -transactions through specification of the `USE_CONFIG_DATABASE` database -option. - -In configuration transactions, the client uses the tuple layer to interact with -the configuration database. Keys are tuples of size two, where the first item -is the configuration class being written, and the second item is the knob name. -The value should be specified as a string. It will be converted to the -appropriate type based on the declared type of the knob being set. - -Below is a sample Python script to write to the configuration database. - -```python -import fdb - -fdb.api_version(800) - -@fdb.transactional -def set_knob(tr, knob_name, knob_value, config_class, description): - tr[b'\xff\xff/description'] = description - tr[fdb.tuple.pack((config_class, knob_name,))] = knob_value - -# This function performs two knob changes transactionally. -@fdb.transactional -def set_multiple_knobs(tr): - tr[b'\xff\xff/description'] = b'description' - tr[fdb.tuple.pack((None, b'min_trace_severity',))] = b'10' - tr[fdb.tuple.pack((b'az-1', b'min_trace_severity',))] = b'20' - -db = fdb.open() -db.options.set_use_config_database() - -set_knob(db, b'min_trace_severity', b'10', None, b'description') -set_knob(db, b'min_trace_severity', b'20', 'az-1', b'description') -``` - -### CLI Usage - -Users may also utilize `fdbcli` to set and update knobs dynamically. Usage is as follows -``` -setknob [config_class] -getknob [config_class] -clearknob [config_class] -``` -Where `knob_name` is an existing knob, `knob_value` is the desired value to set the knob and `config_class` is the optional configuration class. Furthermore, `setknob` may be combined within a `begin\commit` to update multiple knobs atomically. If using this option, a description must follow `commit` otherwise a prompt will be shown asking for a description. The description must be non-empty. An example follows. -``` -begin -setknob min_trace_severity 30 -setknob tracing_udp_listener_addr 192.168.0.1 -commit "fdbcli change" -``` -Users may only combine knob configuration changes with other knob configuration changes in the same transaction. For example, the following is not permitted and will raise an error. -``` -begin -set foo bar -setknob max_metric_size 1000 -commit "change" -``` -Specifically, `set, clear, get, getrange, clearrange` cannot be combined in any transaction with a `setknob` or `getknob`. - -If using an individual `setknob` or `clearknob` without being inside a `begin\commit` block, then `fdbcli` will prompt for a description as well. - -#### Type checking -Knobs have implicit types attached to them when defined. For example, the knob `tracing_udp_listener_addr` is set to `"127.0.0.1"` as so the type is string. If a user invokes `setknob` on this knob with an incorrect value that is not a string, the transaction will fail. - - -### Disable the Configuration Database - -The configuration database includes both client and server changes and is -enabled by default. Thus, to disable the configuration database, changes must -be made to both. - -#### Server - -The configuration database can be disabled by specifying the ``fdbserver`` -command line option ``--no-config-db``. Note that this option must be specified -for *every* ``fdbserver`` process. - -#### Client - -The only client change from the configuration database is as part of the change -coordinators command. The change coordinators command is not considered -successful until the configuration database is readable on the new -coordinators. If the configuration database has been disabled server-side via -the ``--no-config-db`` command line option, the coordinators will continue to -serve the configuration interface, but will reply to each request with an empty -response. Client-side changes are no longer necessary when disabling the -configuration database. - -Optionally, the client liveness check of the configuration database can be -prevented by specifying the ``--no-config-db`` flag when changing the -coordinators. For example: - -``` -fdbcli> coordinators auto --no-config-db -``` - -## Status - -The current state of the configuration database is output as part of `status -json`. The configuration path for each process can be determined from the -``command_line`` key associated with each process. - -Sample from ``status json``: - -``` -"configuration_database" : { - "commits" : [ - { - "description" : "set some knobs", - "timestamp" : 1659570000, - "version" : 1 - }, - { - "description" : "make some other changes", - "timestamp" : 1659570000, - "version" : 2 - } - ], - "last_compacted_version" : 0, - "most_recent_version" : 2, - "mutations" : [ - { - "config_class" : "", - "knob_name" : "min_trace_severity", - "knob_value" : "int:5", - "type" : "set", - "version" : 1 - }, - { - "config_class" : "", - "knob_name" : "compaction_interval", - "knob_value" : "double:30.000000", - "type" : "set", - "version" : 1 - }, - { - "config_class" : "az-1", - "knob_name" : "compaction_interval", - "knob_value" : "double:60.000000", - "type" : "set", - "version" : 1 - }, - { - "config_class" : "", - "knob_name" : "compaction_interval", - "type" : "clear", - "version" : 2 - }, - { - "config_class" : "", - "knob_name" : "update_node_timeout", - "knob_value" : "double:4.000000", - "type" : "set", - "version" : 2 - } - ], - "snapshot" : { - "" : { - "min_trace_severity" : "int:5", - "update_node_timeout" : "double:4.000000" - }, - "az-1" : { - "compaction_interval" : "double:60.000000" - } - } -} -``` - -After compaction, ``status json`` would show: - -``` -"configuration_database" : { - "commits" : [ - ], - "last_compacted_version" : 2, - "most_recent_version" : 2, - "mutations" : [ - ], - "snapshot" : { - "" : { - "min_trace_severity" : "int:5", - "update_node_timeout" : "double:4.000000" - }, - "az-1" : { - "compaction_interval" : "double:60.000000" - } - } -} -``` - -## Detailed Implementation - -The configuration database is implemented as a replicated state machine living -on the coordinators. This allows configuration database transactions to -continue to function in the event of a catastrophic loss of the transaction -subsystem. - -To commit a transaction, clients run the two phase Paxos protocol. First, the -client asks for a live version from a quorum of coordinators. When a -coordinator receives a request for its live version, it increments its local -live version by one and returns it to the client. Then, the client submits its -writes at the live version it received in the previous step. A coordinator will -accept the commit if it is still on the same live version. If a majority of -coordinators accept the commit, it is considered committed. - -### Coordinator - -Each coordinator runs a ``ConfigNode`` which serves as a replica storing one -full copy of the configuration database. Coordinators never communicate with -other coordinators while processing configuration database transactions. -Instead, the client runs the transaction and determines when it has quorum -agreement. - -Coordinators serve the following ``ConfigTransactionInterface`` to allow -clients to read from and write to the configuration database. - -#### ``ConfigTransactionInterface`` -| Request | Request fields | Reply fields | Explanation | -|------------------|----------------------------------------------------------------|-----------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------| -| GetGeneration | (coordinatorsHash) | (generation) or (coordinators_changed error) | Get a new read version. This read version is used for all future requests in the transaction | -| Get | (configuration class, knob name, coordinatorsHash, generation) | (knob value or empty) or (coordinators_changed error) or (transaction_too_old error) | Returns the current value stored at the specified configuration class and knob name, or empty if no value exists | -| GetConfigClasses | (coordinatorsHash, generation) | (configuration classes) or (coordinators_changed error) or (transaction_too_old error) | Returns a list of all configuration classes stored in the configuration database | -| GetKnobs | (configuration class, coordinatorsHash, generation) | (knob names) or (coordinators_changed error) or (transaction_too_old error) | Returns a list of all knob names stored for the provided configuration class | -| Commit | (mutation list, coordinatorsHash, generation) | ack or (coordinators_changed error) or (commit_unknown_result error) or (not_committed error) | Commit mutations set by the transaction | - -Coordinators also serve the following ``ConfigFollowerInterface`` to provide -access to (and modification of) their current state. Most interaction through -this interface is done by the cluster controller through its -``IConfigConsumer`` implementation living on the ``ConfigBroadcaster``. - -#### ``ConfigFollowerInterface`` -| Request | Request fields | Reply fields | Explanation | -|-----------------------|----------------------------------------------------------------------|-----------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------| -| GetChanges | (lastSeenVersion, mostRecentVersion) | (mutation list, version) or (version_already_compacted error) or (process_behind error) | Request changes since the last seen version, receive a new most recent version, as well as recent mutations | -| GetSnapshotAndChanges | (mostRecentVersion) | (snapshot, snapshotVersion, changes) | Request the full configuration database, in the form of a base snapshot and changes to apply on top of the snapshot | -| Compact | (version) | ack | Compact mutations up to the provided version | -| Rollforward | (rollbackTo, lastKnownCommitted, target, changes, specialZeroQuorum) | ack or (version_already_compacted error) or (transaction_too_old error) | Rollback/rollforward mutations on a node to catch it up with the majority | -| GetCommittedVersion | () | (registered, lastCompacted, lastLive, lastCommitted) | Request version information from a ``ConfigNode`` | -| Lock | (coordinatorsHash) | ack | Lock a ``ConfigNode`` to prevent it from serving requests during a coordinator change | - -### Cluster Controller - -The cluster controller runs a singleton ``ConfigBroadcaster`` which is -responsible for periodically polling the ``ConfigNode``s for updates, then -broadcasting these updates to workers through the ``ConfigBroadcastInterface``. -When workers join the cluster, they register themselves and their -``ConfigBroadcastInterface`` with the broadcaster. The broadcaster then pushes -new updates to registered workers. - -The ``ConfigBroadcastInterface`` is also used by ``ConfigNode``s to register -with the ``ConfigBroadcaster``. ``ConfigNode``s need to register with the -broadcaster because the broadcaster decides when the ``ConfigNode`` may begin -serving requests, based on global information about status of other -``ConfigNode``s. For example, if a system with three ``ConfigNode``s suffers a -fault where one ``ConfigNode`` loses data, the faulty ``ConfigNode`` should -not be allowed to begin serving requests again until it has been rolled forward -and is up to date with the latest state of the configuration database. - -#### ``ConfigBroadcastInterface`` - -| Request | Request fields | Reply fields | Explanation | -|------------|------------------------------------------------------------|-------------------------------|---------------------------------------------------------------------------------------------| -| Snapshot | (snapshot, version, restartDelay) | ack | A snapshot of the configuration database sent by the broadcaster to workers | -| Changes | (changes, mostRecentVersion, restartDelay) | ack | A list of changes up to and including mostRecentVersion, sent by the broadcaster to workers | -| Registered | () | (registered, lastSeenVersion) | Sent by the broadcaster to new ``ConfigNode``s to determine their registration status | -| Ready | (snapshot, snapshotVersion, liveVersion, coordinatorsHash) | ack | Sent by the broadcaster to new ``ConfigNode``s to allow them to start serving requests | - -### Worker - -Each worker runs a ``LocalConfiguration`` instance which receives and applies -knob updates from the ``ConfigBroadcaster``. The local configuration maintains -a durable ``KeyValueStoreMemory`` containing the following: - -* The latest known configuration version -* The most recently used configuration path -* All knob overrides corresponding to the configuration path at the latest known version - -Once a worker starts, it will: - -* Apply manually set knobs -* Read its local configuration file - * If the stored configuration path does not match the configuration path - specified on the command line, delete the local configuration file - * Otherwise, apply knob updates from the local configuration file. Manually - specified knobs will not be overridden - * Register with the broadcaster to receive new updates for its configuration - classes - * Persist these updates when received and restart if necessary - -### Knob Atomicity - -All knobs are classified as either atomic or non-atomic. Atomic knobs require a -process restart when changed, while non-atomic knobs do not. - -### Compaction - -``ConfigNode``s store individual mutations in order to be able to update other, -out of date ``ConfigNode``s without needing to send a full snapshot. Each -configuration database commit also contains additional metadata such as a -timestamp and a text description of the changes being made. To keep the size of -the configuration database manageable, a compaction process runs periodically -(defaulting to every five minutes) which compacts individual mutations into a -simplified snapshot of key-value pairs. Compaction is controlled by the -``ConfigBroadcaster``, using information it peridiodically requests from -``ConfigNode``s. Compaction will only compact up to the minimum known version -across *all* ``ConfigNode``s. This means that if one ``ConfigNode`` is -permanently partitioned from the ``ConfigBroadcaster`` or from clients, no -compaction will ever take place. - -### Rollback / Rollforward - -It is necessary to be able to roll ``ConfigNode``s backward and forward with -respect to their committed versions due to the nature of quorum logic and -unreliable networks. - -Consider a case where a client commit gets persisted durably on one out of -three ``ConfigNode``s (assume commit messages to the other two nodes are lost). -Since the value is not committed on a majority of ``ConfigNode``s, it cannot be -considered committed. But it is also incorrect to have the value persist on one -out of three nodes as future commits are made. In this case, the most common -result is that the ``ConfigNode`` will be rolled back when the next commit from -a different client is made, and then rolled forward to contain the data from -the commit. ``PaxosConfigConsumer`` contains logic to recognize ``ConfigNode`` -minorities and update them to match the quorum. - -### Changing Coordinators - -Since the configuration database lives on the coordinators and the -[coordinators can be -changed](https://apple.github.io/foundationdb/configuration.html#configuration-changing-coordination-servers), -it is necessary to copy the configuration database from the old to the new -coordinators during such an event. A coordinator change performs the following -steps in regards to the configuration database: - -1. Write ``\xff/coordinatorsKey`` with the new coordinators string. The key - ``\xff/previousCoordinators`` contains the current (old) set of - coordinators. -2. Lock the old ``ConfigNode``s so they can no longer serve client requests. -3. Start a recovery, causing a new cluster controller (and therefore - ``ConfigBroadcaster``) to be selected. -4. Read ``\xff/previousCoordinators`` on the ``ConfigBroadcaster`` and, if - present, read an up-to-date snapshot of the configuration database on the - old coordinators. -5. Determine if each registering ``ConfigNode`` needs an up-to-date snapshot of - the configuration database sent to it, based on its reported version and the - snapshot version of the database received from the old coordinators. - * Some new coordinators which were also coordinators in the previous - configuration may not need a snapshot. -6. Send ready requests to new ``ConfigNode``s, including an up-to-date snapshot - if necessary. This allows the new coordinators to begin serving - configuration database requests from clients. - -## Testing - -The ``ConfigDatabaseUnitTests`` class unit test a number of different -configuration database dimensions. - -The ``ConfigIncrement`` workload tests contention between clients attempting to -write to the configuration database, paired with machine failure and -coordinator changes. diff --git a/design/encryption-data-at-rest.md b/design/encryption-data-at-rest.md deleted file mode 100644 index be26aa30511..00000000000 --- a/design/encryption-data-at-rest.md +++ /dev/null @@ -1,231 +0,0 @@ -# FDB Encryption **data at-rest** - -## Threat Model - -The proposed solution is `able to handle` the following attacks: - -* An attacker, if able to get access to any FDB cluster host or attached disk, would not be able to read the persisted data. Further, for cloud deployments, returning a cloud instance back to the cloud provider will prevent the cloud provider from reading the contents of data stored on the disk. -* Data stored on a lost or stolen FDB host persistent disk storage device can’t be recovered. - -The proposed solution `will not be able` to handle the following attacks: - -* Encryption is enabled for data at-rest only, generating a memory dump of FDB processes could enable an attacker to read in-memory data contents. -* An FDB cluster host access, if compromised, would allow an attacker to read/write data managed by the FDB cluster. - -## Goals - -FoundationDB being a multi-model, easily scalable and fault-tolerant, with an ability to provide great performance even with commodity hardware, plays a critical role enabling enterprises to deploy, manage and run mission critical applications. - -Data encryption support is a table-stake feature for modern day enterprise service offerings in the cloud. Customers expect, and at times warrant, that their data and metadata be fully encrypted using the latest security standards. The goal of this document includes: - -* Discuss detailed design to support data at-rest encryption support for data stored in FDB clusters. Encrypting data in-transit and/or in-memory caches at various layers in the query execution pipeline (inside and external to FDB) is out of the scope of this feature. -* Isolation guarantees: the encryption domain matches with `tenant` partition semantics supported by FDB clusters. Tenants are discrete namespaces in FDB that serve as transaction domains. A tenant is a `identifier` that maps to a `prefix` within the data-FDB cluster, and all operations within a tenant are implicitly bound within a `tenant-prefix`. Refer to `Multi-Tenant FoundationDB API` documentation more details. However, it is possible to use a single encryption key for the whole cluster, in case `tenant partitioning` isn’t available. -* Ease of integration with external Key Management Services enabling persisting, caching, and lookup of encryption keys. - -## Configuration - -A cluster encryption at-rest properties needs to be configured at the time of database creation, once property is set, it cannot be modified (sticky). Supported modes include: - -* `domain_aware`: follows per-tenant encryption semantics. -* `cluster_aware`: follows per-cluster encryption semantics. - -Following command option needs to be provided to enable encryption at-rest for a newly created database: - -`configure new encryption_at_rest_mode={disabled|domain_aware|cluster_aware}` - -## Encryption Mode - -The proposal is to use strong AES-256 CTR encryption mode. Salient properties are: - -* HMAC_SHA256 key hashing technique is used to derive encryption keys using a base encryption key and locally generated random number. The formula used is as follows: - -``` - DEK = HMAC SHA256(BEK || UID) - -Where -DEK = Derived Encryption Key -BEK = Base Encryption key -UID = Host local random generated number -``` - -UID is an 8 byte host-local random number. Another option would have been a simple host-local incrementing counter, however, the scheme runs the risk of repeated encryption-key generation on cluster/process restarts. - -* An encryption key derived using the above formula will be cached (in-memory) for a short time interval (10 mins, for instance). The encryption-key is immutable, but, the TTL approach allows refreshing encryption key by reaching out to External Encryption KeyManagement solutions, hence, supporting “restricting lifetime of an encryption” feature if implemented by Encryption Key Management solution. -* Initialization Vector (IV) selection would be random. - -## Architecture - -The encryption responsibilities are split across multiple modules to ensure data and metadata stored in the cluster is never persisted in plain text on any durable storages (temporary and/or long-term durable storage). - -## Encryption Request Workflow - -### **Write Request** - -* An FDB client initiates a write transaction providing {key, value} in plaintext format. -* An FDB cluster host as part of processing a write transaction would do the following: - 1. Obtain required encryption key based on the transaction request tenant information. - 2. Encrypt mutations before persisting them on Transaction Logs (TLogs). As a background process, the mutations are moved to a long-term durable storage by the Storage Server processes. - -Refer to the sections below for more details. - -### **Read Request** - -* An FDB client initiates a read transaction request. -* An FDB cluster host as part of processing request would do the following: - 1. StorageServer would read desired data blocks from the persistent storage. - 2. Regenerate the encryption key required to decrypt the data. - 3. Decrypt data and pass results as plaintext to the caller. - - -Below diagram depicts the end-to-end encryption workflow detailing various modules involved and their interactions. The following section discusses detailed design for involved components. - -``` - _______________________________________________________ - | FDB CLUSTER HOST | - | | - _____________________ | ________________________ _________________ | - | | (proprietary) | | | | | - | |<---------- |--| KMS CONNECTOR | | COMMIT PROXIES | | - | ENCRYPTION KEY | | | | | | | - | MANAGEMENT SOLUTION | | |(non FDB - proprietary) | | | | - | | | |________________________| |_________________| | - | | | ^ | | - |_____________________| | | (REST API) | (Encrypt | - | | V Mutation) | - | _________________________________________ | __________________ - | | | | | | - | | ENCRYPT KEYPROXY SERVER |<------|-----------| | - | |_________________________________________| | | | - | | | | BACKUP FILES | - | | (Encrypt Node) | | | - | V | | | - | _________________________________________ | | (Encrypt file) | - | | |<------|-----------| | - | | REDWOOD STORAGE SERVER | | |__________________| - | |_________________________________________| | - |_______________________________________________________| -``` - -## FDB Encryption - -An FDB client would insert data i.e. plaintext {key, value} in a FDB cluster for persistence. - -### Encrypt KeyServer - -Salient features include: - -* New FDB role/process to allow fetching of encryption keys from external KeyManagementService interfaces. The process connects to the KMS-Connector REST interface to fetch desired encryption keys. -* On an encryption-key fetch from KMS-Connector, it applies HMAC derivative function to generate a new encryption key and cache it in-memory. The in-memory cache is used to serve encryption key fetch requests from other FDB processes. - - -Given encryption keys will be needed as part of cluster-recovery, this process/role needs to be recruited at the start of the cluster-recovery process (just after the “master/sequencer” process/role recruitment). All other FDB processes will interact with this process to obtain encryption keys needed to encrypt and/or decrypt the data payload. - -`Note`: An alternative would be to incorporate the functionality into the ClusterController process itself, however, having clear responsibility separation would make design more flexible and extensible in future if needed. - -### KMS-Connector - -Implements a native FDB KMS framework allowing multiple interfaces to co-existing and enabling FDB <-> KMS communication. Salient features: - -* Abstract `KmsConnector` class, the class enables a specialization implementation to implement `actor` supporting desired communication protocol. -* `KmsConnectorInterface` defines the supported endpoints allowing EncryptKeyProxy to fetch/refresh encryption keys. -* `--kms-connector-type` configuration parameter supplied via `foundationdb.conf` controls the runtime selection of KmsConnector selection. - -### **RESTKmsConnector** - -Implements REST protocol communication support to interact with external KMS. - -The `foundationdb.conf` needs to be updated to supply following configuration parameters: - -* `--discover-kms-conn-url-file`: local filesystem file-path defining the URL to connect with KMS on startup. FDB support periodic refreshes of KMS URLs if supported. -* `--kms-conn-validation-token-details`: local file system file(s) detailing the validation tokens needed by KMS to authorize FDB <-> GS communication. -* `--kms-conn-get-encryption-keys-endpoint`: KMS REST endpoint to fetch encryption keys by `baseCipherIds` -* `--kms-conn-get-latest-encryption-keys-endpoint`: KMS REST endpoint to fetch latest encryption keys for a given `encryption domain id` - -### **SimKmsConnector** - -Implements a standalone only KMS connector designed specifically to meet simulation and/or performance needs. The connector doesn't send any RPC calls, however, implements an `actor` backed by `stable encryption key vault` provider; the simulated vault supports process restarts. All Encryption at-rest simulation tests uses SimKmsConnector. - - -### Commit Proxies (CPs) - -When a FDB client initiates a write transaction to insert/update data stored in a FDB cluster, the transaction is received by a CP, which then resolves the transaction by checking if the transaction is allowed. If allowed, it commits the transaction to TLogs. The proposal is to extend CP responsibilities by encrypting mutations using the desired encryption key before mutations get persisted into TLogs (durable storage). The encryption key derivation is achieved using the following formula: - -``` - DEK = HMAC SHA256(BEK || UID) - -Where: - -DEK = Derived Encryption Key -BEK = Base Encryption Key -UID = Host local random generated number -``` - -The Transaction State Store (commonly referred as TxnStateStore) is a Key-Value datastore used by FDB to store metadata about the database itself for bootstrap purposes. The data stored in this store plays a critical role in: guiding the transaction system to persist writes (storage tags to mutations at CPs), and managing FDB internal data movement. The TxnStateStore data gets encrypted with the desired encryption key before getting persisted on the disk queues. - -As part of encryption, every Mutation would be appended by a plaintext `BlobCipherEncryptHeaderRef` to assist decrypting the information for reads. - -CPs would cache (in-memory) recently used encryption-keys to optimize network traffic due to encryption related operations. Further, the caching would improve overall performance, avoiding frequent RPC calls to EncryptKeyServer which may eventually become a scalability bottleneck. Each encryption-key in the cache has a short Time-To-Live (10 mins) and on expiry the process will interact with the EncryptKeyServer to fetch the required encryption-keys. The same caching policy is followed by the Redwood Storage Server and the Backup File processes too. - -### **Caveats** - -The encryption is done inline in the transaction path, which will increase the total commit latencies. Few possible ways to minimize this impact are: - -* Overlap encryption operations with the CP::resolution phase, which would minimize the latency penalty per transaction at the cost of spending more CPU cycles. If needed, for production deployments, we may need to increase the number of CPs per FDB cluster. -* Implement an external process to offload encryption. If done, encryption would appear no different than the CP::resolution phase, where the process would invoke RPC calls to encrypt the buffer and wait for operation completion. - -### Storage Servers - -The encryption design only supports Redwood Storage Server integration, support for other storage engines is yet to be planned. - -### Redwood Storage Nodes - -Redwood at heart is a B+ tree and stores data in two types of nodes: - -* `Non-leaf` nodes: Nodes will only store keys and not values(prefix compression is applied). -* `Leaf` Nodes: Will store `{key, value}` tuples for a given key-range. - -Both above-mentioned nodes will be converted into one or more fixed size pages (likely 4K or 8K) before being persisted on a durable storage. The encryption will be performed at the node level instead of “page level”, i.e. all pages constituting a given Redwood node will be encrypted using the same encryption key generated using the following formula: - -``` - DEK = HMAC SHA256(BEK || UID) - -Where: - -DEK = Derived Encryption Key -BEK = Base Encryption Key -UID = Host local random generated number -``` - -### Backup Files - -Backup Files are designed to pull committed mutations from StorageServers and persist them as “files” stored on cloud backed BlobStorage such as Amazon S3. Each persisted file stores mutations for a given key-range and will be encrypted by generating an encryption key using below formula: - -``` - DEK = HMAC SHA256(BEK || FID) - -Where: - -DEK = Derived Encryption Key -BEK = Base Encryption Key -FID = File Identifier (unique) -``` - -## Decryption on Reads - -To assist reads, FDB processes (StorageServers, Backup Files workers) will be modified to read/parse the encryption header. The data decryption will be done as follows: - -* The FDB process will interact with Encrypt KeyServer to fetch the desired base encryption key corresponding to the key-id persisted in the encryption header. -* Reconstruct the encryption key and decrypt the data block. - -## Configurable Encryption support - -`BlobCipherEncryptHeaderRef` on-disk format allows supporting more than one encryption scheme at the same time. Also, extending support for more encryption schemes in future can be done without involving data migration; `perpetual wiggle` over the period of time will transform existing stored data encryption scheme to a newer one. However, there exists no metrics exposing percentage of data encrypted using a given scheme(s) at the moment. - -**TODO** Encryption scheme support is limited to `AES-256-CTR`. - -## Future Work - -* Extend the TLog API to allow clients to read “plaintext mutations” directly from a TLogServer. In current implementations there are two consumers of TLogs: - - 1. Storage Server: At present the plan is for StorageServer to decrypt the mutations. - 2. BackupWorker (Apple implementation) which is currently not used in the code. diff --git a/design/feature-status.md b/design/feature-status.md index e7852f050af..3207fd05368 100644 --- a/design/feature-status.md +++ b/design/feature-status.md @@ -9,12 +9,11 @@ |[Redwood](https://apple.github.io/foundationdb/redwood.html)|production|Since FDB 7.1 and in production, but not at Apple. Better performance than SQLite storage engine. This B+ tree implementation needs documentation.|None.|Need a maintainer| |Gray failure|production|Since FDB 7.1. Automatically detect gray network failures and trigger recovery to heal the cluster.|Apple FDB Team|Supported| |[Testing Storage Server (TSS)](https://apple.github.io/foundationdb/tss.html)|production|Since FDB 7.1. Running a storage server pairs with identical data (but different engines) and compare their performance and correctness.|Apple FDB Team|Supported| -|[Perpetual Storage Wiggle](https://apple.github.io/foundationdb/perpetual-storage-wiggle.html)|production|Replacing storage servers gradually for storage space reclaimation or storage engine migration.|Apple FDB Team|Supported| +|[Perpetual Storage Wiggle](https://apple.github.io/foundationdb/perpetual-storage-wiggle.html)|production|Replacing storage servers gradually for storage space reclamation or storage engine migration.|Apple FDB Team|Supported| |[FDB K8s Operator](https://github.com/FoundationDB/fdb-kubernetes-operator)|production|Since FDB 6.3 and in a separate repo|Apple FDB Team|Supported| |[mTLS](https://apple.github.io/foundationdb/tls.html)|production|Since FDB 6.1. Mutual TLS between clients and FDB servers.|Apple FDB Team|Supported| |Parallel Restore|experimental|Restore V2 backup files and is used in simulation with V2 backup. Not fault tolerant for production use.|Apple FDB Team|Deprecate and remove the feature.| |Encrypted Backup Files|experimental|Use an encryption key to encrypt backup files.|Apple FDB Team|Supported| -|Encryption at Rest|experimental|Encrypted database contents. Design doc [here](https://github.com/apple/foundationdb/blob/main/design/encryption-data-at-rest.md) talks about a *proposal*. It is related to the experimental multi-tenant feature. This feature is experimental and unowned.|None|This feature is unowned and pending deletion.| |[V2 backup (partitioned mutation logs)](https://github.com/apple/foundationdb/blob/main/design/backup_v2_partitioned_logs.md)|experimental|Mutation logs are no longer stored separately on storage servers. A new backup worker role pulls mutations from tlogs and uploads to S3, reducing half write bandwidth to tlogs. We are targeting 7.4 release for production use.|Apple FDB Team|Will evolve into range-partitioned mutation logs, i.e., V3 backup.| |Sharded RocksDB Storage Engine|experimental|Use column family for shards so that data movement, backup and restore can use file-based data copies, instead of going through the storage engine, for performance gains.|Apple FDB Team|Supported| |[Bulk Loading & Dumping](https://apple.github.io/foundationdb/bulkdump.html)|experimental|Since 7.4. Dumping an idle cluster to S3 and loading data from S3 into an empty cluster.|Apple FDB Team|Will evolve into any keyrange dumping and loading with support to work with live clusters.| @@ -22,12 +21,11 @@ |gRPC|under development|Adding gRPC endpoints for certain client/server communications, e.g., some fdbcli usage, file transfer to and from fdbserver processes.|Apple FDB Team|Supported| |Multitenancy|experimental|Multi-tenant support, contributed from the community.|None|This feature has been deleted.| |Metacluster|experimental|contributed from the community.|None|This feature has been deleted.| -|Encryption at rest|experimental|contributed from the community.|None|This feature is unowned and pending deletion.| +|Encryption at rest|experimental|contributed from the community.|None|This feature has been deleted.| |Storage Cache|experimental|contributed from the community. Serve as memory cache for storage servers. This is probably never finished and another solution to read hot shard is using large storage teams.|None|This feature has been deleted.| |Blob granule|experimental|contributed from the community. Related to backup.|None|This feature has been deleted.| |ChangeFeed|experimental|contributed from the community.|None|This feature has been deleted.| -|Configuration Database|experimental|contributed from the community. Have data consistency issues, probably not finished.|None|Development is incomplete and feature is unowned. Scheduled for deletion.| +|Configuration Database|experimental|contributed from the community. Have data consistency issues, probably not finished.|None|This feature has been deleted.| |OpenTelemetry|experimental|contributed from the community. Probably not finished.|None|Need a new owner.| -|Dynamic knobs|experimental|Allows changing fdbserver process knob values without requiring a process restart|None|Need a new owner or become deprecated.| |Tag Throttling|experimental|limits the transaction rate for specific transaction tags|None|Need a new owner or become deprecated.| |Windows Binary|experimental|FoundationDB on Windows Platform|Community (To be filled out by the the exact company/person)|Unclear if needed.| diff --git a/design/tuple.md b/design/tuple.md index 1b5061f55b6..32c481a92da 100644 --- a/design/tuple.md +++ b/design/tuple.md @@ -190,7 +190,7 @@ Length: Variable Encoding: A big-endian integer encoding the length (1 byte for `0x34`, 2 bytes for `0x35`) followed by the given number of bytes. Status: Reserved -This type was added to accomodate use cases that do not need range scans or deal with fixed-length identifiers, such as hashes and EC public keys. +This type was added to accommodate use cases that do not need range scans or deal with fixed-length identifiers, such as hashes and EC public keys. This typecode only allows range searches if the length is known beforehand. Support is currently not included in the first-party bindings. diff --git a/design/validating_restored_data_using_one_cluster.md b/design/validating_restored_data_using_one_cluster.md new file mode 100644 index 00000000000..34cac712288 --- /dev/null +++ b/design/validating_restored_data_using_one_cluster.md @@ -0,0 +1,87 @@ +# Validating restored data with source in the same cluster + +Author: Neethu Haneeshi Bingi + +## Goal: + +The goal is to verify the restored data from a backup with the original source data to ensure the end-to-end correctness of backup and restore flow. While this validation is currently performed in simulation, it is limited in scale and differs from the production backup environment. +This new validation flow should: + +* Run **quickly as possible and automatically**, enabling execution at regular intervals like bi-weekly or with every patch release. +* Increase confidence in **backup/restore reliability** by testing this in production-like clusters especially as upcoming backup and restore using bulk-loading projects evolve over the next year. +* Should be able to validate data for smaller key ranges for faster turn around time and be able to test the entire database data. + + + +## Usage: + +* Workflow running for smaller keyRanges once per week. Workflow can take multiple keyRange boundaries from getLocation fdbcli api output and will run the rest of the workflow steps. Should be able to complete within 3-4 hours. +* Workflow running for entire DB data every patch release/once bi-monthly. This has a constraint for the cluster to be only 35% filled, so target to be able to complete in 5 days. + +By doing this, we can validate data reliability in this path and also the restore speed for any regressions. + + +## **High-Level Idea** + +Store both **source** and **restored** data within the same cluster to eliminate the need for a separate validation cluster. +Every key-value pair will be compared directly, and exact key/value **corruptions or mismatches** will be reported. + + + +## **Solution:** + +More detailed steps followed by the diagram +[Image: Screenshot 2025-11-12 at 7.53.04 PM.png] +The validation consists of **three main steps** — **Backup**, **Restore**, and **Compare** — followed by a **Cleanup** phase. + +* Backup: + * Run workload → Start Backup → Stop load → NoteDown ReadVersion → Lock cluster for writes → Wait until (ReadVersion saved > MaxRestorableVersion) → Stop Backup + * Constraint: If you running this validate entire DB data, ensure cluster has >65% available space in this phase. If space drops below this, stop/clear the load. + Note: Locking the cluster still allows backup and restore operations +* Restore: + * Setup: Use the `addPrefix` parameter to restore into a validation keyspace. When restoring with a prefix, the restore destination empty check is automatically bypassed. + * GetMaxRestoreVersion from backup → Start restore with add-prefix option → Wait for restore completion + * Note: + * Restore writes into a predefined restore_data_prefix (**/xff/x02/rlog**). Restore does lock-aware transactions to bypass the lock. Restore already supports option to add prefix to data. + * **Important**: Restore will clear and overwrite any existing data at the destination range. This is standard restore behavior. + * The restore should work for smaller key ranges or for entire db data. +* Compare: + * Run fdbcli ‘audit_storage’ command with AuditType=validate_restore + * `audit_storage validate_restore [BeginKey] [EndKey]` (beginKey,endKey should be within userKeyRange) + * BeginKey and Endkey should be same range given to restore command. + * DD dispatches validation tasks to SS and monitors the progress + * Each SS compares data and updates the results in the DB. + * Monitor the audit storage status, fdbcli command + * `get_audit_status validate_restore progress [AuditID]` + * Once complete/error, check for corruptions +* Clean up: + * Clear up the restored data in restored_data_prefix + * Clear off the setup in restore phase + * Start backup + * Unlock cluster for writes. + +**Note:** + +* Backup–Restore–Compare steps can be automated as a single script/workflow, while Cleanup can be managed via a separate script/workflow. +* The validate_restore process compares user keys against the restored data, but not the other way around. As a result, it can confirm that all user keys were successfully restored, but cannot detect any extra keys that may exist in the restored data + +## +Alternative Design Considerations + +* Validate two databases content are same: Might be slow. Needs an external process like Consistency Checker to compare two databases. With our availability infra, difficult to have two databases +* Validate checksum of two databases: [Comparing FDB Contents Using Merkle-Tree MD5](https://quip-apple.com/1x2jAMZJP5YB). Detects inconsistency but cannot pinpoint which key/value differs. + + + +## Implementation Details + +* **Locking/Unlocking database:** Use the similar api's restore uses locking and unlocking database. [Lock](https://github.com/apple/foundationdb/blob/release-7.4/fdbclient/FileBackupAgent.actor.cpp#L6754) [Unlock](https://github.com/apple/foundationdb/blob/release-7.4/fdbclient/FileBackupAgent.actor.cpp#L4286). Don't allow the restore to unlockDB as we want lock the database until the comparison is done. Restore does lock-aware transactions to bypass this lock. +* **Wait until (ReadVersion saved > MaxRestorableVersion) step** in Backup phase: There might be small gap before we save readVersion and lock the DB. Ensure to wait for at least backup_lag_seconds not to miss any mutations in the backup +* **Restore destination check:** The restore empty destination check is bypassed when `addPrefix.size() > 0` (indicating a validation restore to a prefixed keyspace). For regular restores without a prefix, the check remains enforced to prevent accidental data loss. Note that all restores (validation or regular) will clear and overwrite any existing data at the destination range - this is standard restore behavior. +* **Audit Storage:** + * Default value of BeginKey and EndKey is normalKeys.begin and normalKeys.end. Validate both keys are in normalKeys/userKeys range, systemKeys should be not included as they are in the backup. + * Add new AuditType **ValidateRestore.** + * New audit actor auditRestoreQ() in StorageServer similar to auditStorageShardReplicaQ() + * Change the code to read the range appended with the prefix restored_data_prefix. Can compare, update metadata and error mechanism in the same way. [Range](https://github.com/apple/foundationdb/blob/release-7.4/fdbserver/storageserver.actor.cpp#L5671) + + diff --git a/documentation/sphinx/.pip.conf b/documentation/sphinx/.pip.conf index e86fb8d4cc3..eaeb330dbda 100644 --- a/documentation/sphinx/.pip.conf +++ b/documentation/sphinx/.pip.conf @@ -1,3 +1,2 @@ [global] timeout = 60 -index-url = https://pypi.python.org/simple diff --git a/documentation/sphinx/source/administration.rst b/documentation/sphinx/source/administration.rst index 57ce291fdb1..043109e6c57 100644 --- a/documentation/sphinx/source/administration.rst +++ b/documentation/sphinx/source/administration.rst @@ -14,6 +14,8 @@ Administration tls bulkload-user bulkdump + auditstorage + restore-validation-testing This document covers the administration of an existing FoundationDB cluster. We recommend you read this document before setting up a cluster for performance testing or production use. diff --git a/documentation/sphinx/source/auditstorage.rst b/documentation/sphinx/source/auditstorage.rst index 268a443b5f6..cc3a1f41a71 100644 --- a/documentation/sphinx/source/auditstorage.rst +++ b/documentation/sphinx/source/auditstorage.rst @@ -97,3 +97,8 @@ The AuditStorage tool checks the consistency between ServerKey and SS local shar In this job, the tool needs to check each storage server to see each SS has the shard mapping consistent with ServerKey. For each SS, DD partitions the job range in the unit of shards. Given a shard, DD requests the SS to check the consistency between the shard mapping and the ServerKey. The SS reads the shard mapping and compares it with the ServerKey. + +Restore Validation Testing +=========================== + +For detailed instructions on testing the restore validation feature, see :ref:`restore-validation-testing`. diff --git a/documentation/sphinx/source/backups.rst b/documentation/sphinx/source/backups.rst index 7b9fcd43a08..77323530ee4 100644 --- a/documentation/sphinx/source/backups.rst +++ b/documentation/sphinx/source/backups.rst @@ -103,7 +103,7 @@ If is not specified on the URL, it will be looked up in :ref:`blob cred An example blob store Backup URL would be ``blobstore://myKey:mySecret@something.domain.com:80/dec_1_2017_0400?bucket=backups``. If S3 is the target blobstore, the URL would look like: ``blobstore://${AWS_ACCESS_KEY_ID}:${AWS_SECRET_ACCESS_KEY}:${AWS_SESSION_TOKEN}@backup-12345-us-west-2.s3.amazonaws.com/dailies?bucket=backup-12345-us-west-2®ion=us-west-2`` -The and may be ommitted from the URL and instead picked up from :ref:`blob credential sources`: ``blobstore://${AWS_ACCESS_KEY_ID}@backup-12345-us-west-2.s3.amazonaws.com/dailies?bucket=backup-12345-us-west-2®ion=us-west-2`` +The and may be omitted from the URL and instead picked up from :ref:`blob credential sources`: ``blobstore://${AWS_ACCESS_KEY_ID}@backup-12345-us-west-2.s3.amazonaws.com/dailies?bucket=backup-12345-us-west-2®ion=us-west-2`` To pickup the , , and from :ref:`blob credentials file`, write the blob backup URL as: ``blobstore://@backup-12345-us-west-2.s3.amazonaws.com/dailies?bucket=backup-12345-us-west-2®ion=us-west-2`` (Notice the '@' in front of the hostname). Blob store Backup URLs can have optional parameters at the end which set various limits or options used when communicating with the store. All values must be positive decimal integers unless otherwise specified. The speed related default values are not very restrictive. A parameter is applied individually on each ``backup_agent``, meaning that a global restriction should be calculated based on the number of agent running. The most likely parameter a user would want to change is ``max_send_bytes_per_second`` (or ``sbps`` for short) which determines the upload speed to the blob service. diff --git a/documentation/sphinx/source/bulkdump.rst b/documentation/sphinx/source/bulkdump.rst index 2a8c4976f1f..cc76a2f25d6 100644 --- a/documentation/sphinx/source/bulkdump.rst +++ b/documentation/sphinx/source/bulkdump.rst @@ -54,7 +54,7 @@ How to use? ----------- Currently, FDBCLI tools and low-level ManagementAPIs are provided to submit a job or clear a job. These operations are achieved by issuing transactions to update the bulkdump metadata. -Submitting a job is achieved by writting the job metadata to the bulkdump metadata range of the job. +Submitting a job is achieved by writing the job metadata to the bulkdump metadata range of the job. When submitting a job, the API checks if there is any ongoing bulkdump job. If yes, it will reject the job. Otherwise, it accepts the job. Clearing a job is achieved by erasing the entire user range space of the bulkdump metadata range. When clearing a job, all metadata will be cleared and any ongoing task is stopped (with some latency). diff --git a/documentation/sphinx/source/command-line-interface.rst b/documentation/sphinx/source/command-line-interface.rst index a6560bbd948..779a1975c55 100644 --- a/documentation/sphinx/source/command-line-interface.rst +++ b/documentation/sphinx/source/command-line-interface.rst @@ -49,13 +49,6 @@ The ``clear`` command clears a key from the database. Its syntax is ``clear ` when specifying keys (or values) in ``fdbcli``. -clearknob ---------- - -The ``clearknob`` command can be used to clear knobs set in the configuration database. Its syntax is ``clearknob [CONFIGCLASS]``. If not present in a ``begin\commit`` block, the CLI will prompt for a description of the change. - -Note that :ref:`characters can be escaped ` when specifying keys (or values) in ``fdbcli``. - clearrange ---------- @@ -71,7 +64,7 @@ The ``commit`` command commits the current transaction. Any sets or clears execu configure --------- -The ``configure`` command changes the database configuration. Its syntax is ``configure [new|tss] [single|double|triple|three_data_hall|three_datacenter] [ssd|memory] [grv_proxies=] [commit_proxies=] [resolvers=] [logs=] [count=] [perpetual_storage_wiggle=] [perpetual_storage_wiggle_locality=<:|0>] [storage_migration_type={disabled|aggressive|gradual}] [encryption_at_rest_mode={aes_256_ctr|disabled}]``. +The ``configure`` command changes the database configuration. Its syntax is ``configure [new|tss] [single|double|triple|three_data_hall|three_datacenter] [ssd|memory] [grv_proxies=] [commit_proxies=] [resolvers=] [logs=] [count=] [perpetual_storage_wiggle=] [perpetual_storage_wiggle_locality=<:|0>] [storage_migration_type={disabled|aggressive|gradual}]``. The ``new`` option, if present, initializes a new database with the given configuration rather than changing the configuration of an existing one. When ``new`` is used, both a redundancy mode and a storage engine must be specified. @@ -250,13 +243,6 @@ The ``get`` command fetches the value of a given key. Its syntax is ``get ` Note that :ref:`characters can be escaped ` when specifying keys (or values) in ``fdbcli``. -getknob -------- - -The ``getknob`` command fetches the value of a given knob that has been populated by ``setknob``. Its syntax is ``getknob [CONFIGCLASS]``. It displays the value of ```` if ```` is present in the database and ``not found`` otherwise. - -Note that :ref:`characters can be escaped ` when specifying keys (or values) in ``fdbcli``. - getrange -------- @@ -449,13 +435,6 @@ The ``setclass`` command can be used to change the :ref:`process class [CONFIGCLASS]``. If not present in a ``begin\commit`` block, the CLI will prompt for a description of the change. - -Note that :ref:`characters can be escaped ` when specifying keys (or values) in ``fdbcli``. - sleep ----- @@ -622,4 +601,3 @@ It will populate a list of available storage servers' network addresses. Users n ``hotrange `` Fetch read metrics from the given storage server to find the hot range. Run ``help hotrange`` to read the guide. - diff --git a/documentation/sphinx/source/disk-snapshot-backup.rst b/documentation/sphinx/source/disk-snapshot-backup.rst index 33b97b8c093..3d6077ecf73 100644 --- a/documentation/sphinx/source/disk-snapshot-backup.rst +++ b/documentation/sphinx/source/disk-snapshot-backup.rst @@ -51,7 +51,7 @@ Before using the ``snapshot`` command the following setup needs to be done * Write a program that will snapshot the local disk store when invoked by the ``fdbserver`` with the following arguments: - - UID - 32 byte alpha-numeric unique identifier, the same identifier will be passed to all the nodes in the cluster, can be used to identify the set of disk snapshots associated with this backup + - UID - 32 byte alphanumeric unique identifier, the same identifier will be passed to all the nodes in the cluster, can be used to identify the set of disk snapshots associated with this backup - Version - version string of the FoundationDB binary - Path - path of the FoundationDB ``datadir`` to be snapshotted, ``datadir`` specified in :ref:`foundationdb-conf-fdbserver` - Role - ``tlog``/``storage``/``coordinator``, identifies the role of the node on which the snapshot is being invoked diff --git a/documentation/sphinx/source/mr-status-json-schemas.rst.inc b/documentation/sphinx/source/mr-status-json-schemas.rst.inc index 874cb3d0702..060ff759ec0 100644 --- a/documentation/sphinx/source/mr-status-json-schemas.rst.inc +++ b/documentation/sphinx/source/mr-status-json-schemas.rst.inc @@ -898,11 +898,6 @@ "gradual", "aggressive" ]}, - "encryption_at_rest_mode": { - "$enum":[ - "disabled", - "aes_256_ctr" - ]} }, "data":{ "least_operating_space_bytes_log_server":0, diff --git a/documentation/sphinx/source/release-notes/release-notes-022.rst b/documentation/sphinx/source/release-notes/release-notes-022.rst index deb432424c3..d70dff40f3e 100644 --- a/documentation/sphinx/source/release-notes/release-notes-022.rst +++ b/documentation/sphinx/source/release-notes/release-notes-022.rst @@ -37,7 +37,7 @@ Language APIs * Java * Compiles class files with 1.6 source and target flags. - * Single-jar packaging for all platforms. (In rare cases, setting the ``FDB_LIBRARY_PATH_FDB_JAVA`` environment variable will be requried if you previously relied on loading the library from a system path.) + * Single-jar packaging for all platforms. (In rare cases, setting the ``FDB_LIBRARY_PATH_FDB_JAVA`` environment variable will be required if you previously relied on loading the library from a system path.) * Ruby @@ -53,7 +53,7 @@ Fixes * Status showed the wrong explanation when performance was limited by system write-to-read latency limit. * Fixed a rare issue where a "stuck" process trying to participate in the database could run out of RAM. * Increased robustness of FoundationDB server when loaded with large data sets. -* Eliminated certain cases where the data distribution algorithim could do unnecessary splitting and merging work. +* Eliminated certain cases where the data distribution algorithm could do unnecessary splitting and merging work. * Several fixes for rare issues encountered by our fault simulation framework. * Certain uncommon usage of on_ready() in Python could cause segmentation faults. diff --git a/documentation/sphinx/source/release-notes/release-notes-430.rst b/documentation/sphinx/source/release-notes/release-notes-430.rst index 0d83be2450b..63028cf0d6a 100644 --- a/documentation/sphinx/source/release-notes/release-notes-430.rst +++ b/documentation/sphinx/source/release-notes/release-notes-430.rst @@ -8,7 +8,7 @@ Release Notes Features -------- -* Improved DR thoughput by having mutations copied into the DR database before applying them. +* Improved DR throughput by having mutations copied into the DR database before applying them. * Renamed db_agent to dr_agent. * Added more detailed DR and backup active task detail into layer status. diff --git a/documentation/sphinx/source/release-notes/release-notes-500.rst b/documentation/sphinx/source/release-notes/release-notes-500.rst index 36ed9cc55d8..1fe1a29a8a5 100644 --- a/documentation/sphinx/source/release-notes/release-notes-500.rst +++ b/documentation/sphinx/source/release-notes/release-notes-500.rst @@ -49,7 +49,7 @@ Fixes * Logs continued to make their data persistent to disk after being removed. * Removed logs did not delete their data before shutting down. -* In rare scenarios, a disk error which occurred during log recruitment could cause the recruitment to hang indefinately. +* In rare scenarios, a disk error which occurred during log recruitment could cause the recruitment to hang indefinitely. 5.0.3 ===== diff --git a/documentation/sphinx/source/release-notes/release-notes-510.rst b/documentation/sphinx/source/release-notes/release-notes-510.rst index 8e3fc540632..5f90fa3f5ef 100644 --- a/documentation/sphinx/source/release-notes/release-notes-510.rst +++ b/documentation/sphinx/source/release-notes/release-notes-510.rst @@ -27,7 +27,7 @@ Fixes * The consistency check calculated the size of the database inefficiently. * Could not create new directories with the Python and Ruby implementations of the directory layer. * fdbcli could erroneously report that it was incompatible with some processes in the cluster. -* The commit commmand in fdbcli did not wait for the result of the commit before continuing to the next command. +* The commit command in fdbcli did not wait for the result of the commit before continuing to the next command. Other Changes diff --git a/documentation/sphinx/source/release-notes/release-notes-600.rst b/documentation/sphinx/source/release-notes/release-notes-600.rst index f3a921b15be..6a1611cf312 100644 --- a/documentation/sphinx/source/release-notes/release-notes-600.rst +++ b/documentation/sphinx/source/release-notes/release-notes-600.rst @@ -51,7 +51,7 @@ Features * Added support for asynchronous replication to a remote DC with processes in a single cluster. This improves on the asynchronous replication offered by fdbdr because servers can fetch data from the remote DC if all replicas have been lost in one DC. * Added support for synchronous replication of the transaction log to a remote DC. This remote DC does not need to contain any storage servers, meaning you need much fewer servers in this remote DC. * The TLS plugin is now statically linked into the client and server binaries and no longer requires a separate library. `(Issue #436) `_ -* TLS peer verification now supports verifiying on Subject Alternative Name. `(Issue #514) `_ +* TLS peer verification now supports verifying on Subject Alternative Name. `(Issue #514) `_ * TLS peer verification now supports suffix matching by field. `(Issue #515) `_ * TLS certificates are automatically reloaded after being updated. [6.0.5] `(Issue #505) `_ * Added the ``fileconfigure`` command to fdbcli, which configures a database from a JSON document. [6.0.10] `(PR #713) `_ diff --git a/documentation/sphinx/source/release-notes/release-notes-620.rst b/documentation/sphinx/source/release-notes/release-notes-620.rst index 3e388a81299..fb7e3c3f856 100644 --- a/documentation/sphinx/source/release-notes/release-notes-620.rst +++ b/documentation/sphinx/source/release-notes/release-notes-620.rst @@ -121,7 +121,7 @@ Release Notes ====== * When configuring a cluster to usable_regions=2, data distribution would not react to machine failures while copying data to the remote region. `(PR #2774) `_. -* When a cluster is configured with usable_regions=2, data distribution could push a cluster into saturation by relocating too many shards simulatenously. `(PR #2776) `_. +* When a cluster is configured with usable_regions=2, data distribution could push a cluster into saturation by relocating too many shards simultaneously. `(PR #2776) `_. * Do not allow the cluster controller to mark any process as failed within 30 seconds of startup. `(PR #2780) `_. * Backup could not establish TLS connections (broken in 6.2.16). `(PR #2775) `_. * Certificates were not refreshed automatically (broken in 6.2.16). `(PR #2781) `_. diff --git a/documentation/sphinx/source/release-notes/release-notes-700.rst b/documentation/sphinx/source/release-notes/release-notes-700.rst index 823c0308b51..de56447519c 100644 --- a/documentation/sphinx/source/release-notes/release-notes-700.rst +++ b/documentation/sphinx/source/release-notes/release-notes-700.rst @@ -46,7 +46,7 @@ Performance Reliability ----------- * Backup agents now pause themselves upon a successful snapshot recovery to avoid unintentional data corruption. Operators should manually abort backup agents and clear the backup agent keyspace to avoid using the old cluster's backup configuration. `(PR #4027) `_ -* Log class processes are prioritized above transaction class proceses for becoming tlogs. `(PR #4509) `_ +* Log class processes are prioritized above transaction class processes for becoming tlogs. `(PR #4509) `_ * Improved worker recruitment logic to avoid unnecessary recoveries when processes are added or removed from a cluster. `(PR #4695) `_ `(PR #4631) `_ `(PR #4509) `_ Fixes diff --git a/documentation/sphinx/source/release-notes/release-notes-710.rst b/documentation/sphinx/source/release-notes/release-notes-710.rst index f2235427cae..ebff2f62a14 100644 --- a/documentation/sphinx/source/release-notes/release-notes-710.rst +++ b/documentation/sphinx/source/release-notes/release-notes-710.rst @@ -229,8 +229,8 @@ Release Notes * Fixed a data distribution bug where exclusions can become stuck because DD cannot build new teams. `(PR #9035) `_ * Added FoundationDB version to ProcessMetrics. `(PR #9037) `_ * Removed RocksDB read iterator destruction from the commit path. `(PR #8971) `_ -* Added determinstic degraded server selection in gray failure detection. `(PR #9001) `_ -* Fixed an interger overflow bug that causes fetching backup files to fail. `(PR #8996) `_ +* Added deterministic degraded server selection in gray failure detection. `(PR #9001) `_ +* Fixed an integer overflow bug that causes fetching backup files to fail. `(PR #8996) `_ * Fixed a log router race condition that blocks remote tlogs forever. `(PR #8966) `_ * Fixed a backup worker assertion failure. `(PR #8887) `_ * Upgraded RocksDB to 7.7.3 version. `(PR #8880) `_ diff --git a/documentation/sphinx/source/release-notes/release-notes-730.rst b/documentation/sphinx/source/release-notes/release-notes-730.rst index e0cf1033b10..f00ea980918 100644 --- a/documentation/sphinx/source/release-notes/release-notes-730.rst +++ b/documentation/sphinx/source/release-notes/release-notes-730.rst @@ -2,6 +2,23 @@ Release Notes ############# +7.3.75 +====== +* Same as 7.3.74 release with AVX enabled. + +7.3.74 +====== +* Update rocksdb version to 8.11.5. `(PR #12690) `_ + +7.3.73 +====== +* Same as 7.3.72 release with AVX enabled. + +7.3.72 +====== +* Added backup encryption support and related status/CLI improvements. `(PR #12580) `_, `(PR #12590) `_, `(PR #12621) `_, and `(PR #12658) `_ +* Fixed a backup decode range file bug when interpreting keys and values. `(PR #12399) `_ + 7.3.71 ====== * Same as 7.3.71 release with AVX enabled. @@ -335,7 +352,7 @@ Release Notes 7.3.16 ====== * Released with AVX disabled. -* Added location_metadata fdbcli to query shard locations and assignements. `(PR #10428) `_ +* Added location_metadata fdbcli to query shard locations and assignments. `(PR #10428) `_ * Added degraded/disconnected peer recovery in gray failure. `(PR #10541) `_ * Added replica and metadata audit support. `(PR #10631) `_ * Added a SecurityMode for data distributor where data movements are not allowed but auditStorage is enabled. `(PR #10660) `_ diff --git a/documentation/sphinx/source/release-notes/release-notes-740.rst b/documentation/sphinx/source/release-notes/release-notes-740.rst index fe849de472b..2c4c3eea5ae 100644 --- a/documentation/sphinx/source/release-notes/release-notes-740.rst +++ b/documentation/sphinx/source/release-notes/release-notes-740.rst @@ -2,6 +2,41 @@ Release Notes ############# +7.4.6 +===== + +AVX enabled release. + +* Fixed a restore stuck bug where readLogData() errors were not propagated correctly. `(PR #12433) `_ +* Fixed FdbDecode memory issues. `(PR #12495) `_ +* Fixed decode range file keys and values interpretation bug. `(PR #12420) `_ +* Fixed the flags used for opening the backup encryption key file. `(PR #12424) `_ +* Fixed backup encryption on S3. `(PR #12289) `_ +* Added support for backup encryption via fdbbackup modify command. `(PR #12591) `_ +* Added support for backup worker to use proxy from command line to upload to S3. `(PR #12566) `_ +* Added metadata for encrypted backups. `(PR #12354) `_ +* Added FileLevelEncryption field to BackupDescription JSON output. `(PR #12626) `_ +* Added encryption key info to status JSON. `(PR #12657) `_ +* Added SNI in TLS handshake. `(PR #12385) `_ +* Added ARM support for Docker images. `(PR #12425) `_ +* Added restore validation feature allowing backup/restore validation in a single cluster. `(PR #12648) `_ +* Downgraded RocksDB to 8.11.5. `(PR #12651) `_ + +7.4.5 +===== +* Same as 7.4.4 release with AVX enabled. + +7.4.4 +===== + +* Fixed a race that could cause recovery to be stuck when purging old generations. `(PR #12214) `_ +* Fixed TSS mismatch handling crashes. `(PR #12330) `_ +* Fixed potential DatabaseContext leaks in multi-version client. `(PR #12308) `_ +* Fixed double-encoding of URIs in requests to S3BlobStore. `(PR #12302) `_ +* Fixed "Unknown error" when configuring regions. `(PR #12314) `_ +* Improved TLS handshake process to avoid blocking main thread. `(PR #12346) `_ +* Fixed an issue in TLog server code that was causing OOMs. `(PR #12298) `_ + 7.4.3 ===== * Same as 7.4.2 release with AVX enabled. diff --git a/documentation/sphinx/source/restore-validation-testing.rst b/documentation/sphinx/source/restore-validation-testing.rst new file mode 100644 index 00000000000..daa584d9479 --- /dev/null +++ b/documentation/sphinx/source/restore-validation-testing.rst @@ -0,0 +1,548 @@ +.. _restore-validation-testing: + +################################## +Restore Validation Testing Guide +################################## + +This guide provides step-by-step instructions for testing the restore validation feature in FoundationDB. + +Quick Setup +=========== + +1. Build Required Binaries +--------------------------- + +:: + + cd ~/build_output + cmake --build . --target fdbserver fdbcli fdbbackup backup_agent -j4 + +2. Start Test Cluster +--------------------- + +:: + + # Create directories and cluster file + mkdir -p ~/fdb_test_data ~/fdb_backup + echo "test:test@127.0.0.1:4500" > ~/fdb_test.cluster + + # Start fdbserver + ~/build_output/bin/fdbserver -p 127.0.0.1:4500 -d ~/fdb_test_data \ + -C ~/fdb_test.cluster & + + # Configure database + sleep 3 + ~/build_output/bin/fdbcli -C ~/fdb_test.cluster --exec "configure new single memory" + + # Start backup agent (required for backups) + sleep 2 + ~/build_output/bin/backup_agent -C ~/fdb_test.cluster & + + sleep 2 + echo "Cluster ready" + +Testing Workflow +================ + +Phase 1: Set Up Test Data +-------------------------- + +Write Some Test Data +^^^^^^^^^^^^^^^^^^^^ + +:: + + ~/build_output/bin/fdbcli -C ~/fdb_test.cluster + + # In fdbcli: + fdb> writemode on + fdb> set testkey1 testvalue1 + fdb> set testkey2 testvalue2 + fdb> set testkey3 testvalue3 + fdb> set mykey myvalue + + # Verify data + fdb> getrange "" "\xff" + +Phase 2: Backup +--------------- + +Start Backup +^^^^^^^^^^^^ + +:: + + # Start backup (directory already created in setup) + ~/build_output/bin/fdbbackup start -C ~/fdb_test.cluster \ + -d file:///Users/stack/fdb_backup -z + + # Check status - wait until backup is restorable + ~/build_output/bin/fdbbackup status -C ~/fdb_test.cluster + +Wait approximately 15-30 seconds until status shows backup is "restorable". The output should include: + +- ``BackupURL: file:///Users/stack/fdb_backup/backup-`` +- Status showing "completed" or "running differential" + +Verify Backup is Ready +^^^^^^^^^^^^^^^^^^^^^^ + +:: + + # Get the specific backup URL from the status output + ~/build_output/bin/fdbbackup status -C ~/fdb_test.cluster + + # Verify it's restorable (use the actual backup URL from status) + ~/build_output/bin/fdbbackup describe \ + -d file:///Users/stack/fdb_backup/backup- + +Look for ``Restorable: true`` and ``MaxRestorableVersion`` in the output. + +Stop the Backup +^^^^^^^^^^^^^^^ + +Before proceeding with restore validation, stop the backup to ensure no more differential data is being captured: + +:: + + # Stop the backup (use discontinue to keep the backup data) + ~/build_output/bin/fdbbackup discontinue -C ~/fdb_test.cluster + + # Verify backup is stopped + ~/build_output/bin/fdbbackup status -C ~/fdb_test.cluster + +.. note:: + Use ``discontinue`` rather than ``abort`` to preserve the backup data. The ``discontinue`` command stops the backup gracefully while keeping it restorable. + +Phase 3: Restore to Validation Prefix +-------------------------------------- + +Lock the Database +^^^^^^^^^^^^^^^^^ + +Before starting the restore, lock the database to prevent any writes during validation: + +:: + + ~/build_output/bin/fdbcli -C ~/fdb_test.cluster + + # Lock the database - save the UID returned! + fdb> lock + +The lock command returns a UID that you'll need to unlock the database later. + +Restore with Prefix +^^^^^^^^^^^^^^^^^^^ + +**Important**: Use the SPECIFIC backup URL from the previous step (not the parent directory): + +:: + + # Use fdbrestore with the actual backup URL + ~/build_output/bin/fdbrestore start \ + -r file:///Users/stack/fdb_backup/backup- \ + --dest-cluster-file ~/fdb_test.cluster \ + --add-prefix "\xff\x02/rlog/" \ + -w + +.. warning:: + Using ``file:///Users/stack/fdb_backup`` (parent dir) instead of the full backup path will fail with "not restorable to any version". + +The ``\xff\x02/rlog/`` prefix is the ``validateRestoreLogKeys`` range where validation looks for restored data. + +.. important:: + **Restore will clear and overwrite any existing data at the destination range.** This is standard restore behavior. The restore process explicitly clears the destination range before writing restored data, so any pre-existing keys in ``\xff\x02/rlog/`` will be replaced. + +Verify Restored Data Exists +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + ~/build_output/bin/fdbcli -C ~/fdb_test.cluster + + # In fdbcli, enable system keys to see restored data + fdb> option on ACCESS_SYSTEM_KEYS + fdb> getrange "\xff\x02/rlog/" "\xff\x02/rlog0" + + # You should see your keys with the prefix + # e.g., "\xff\x02/rlog/testkey1" -> "testvalue1" + +Phase 4: Run Validation +------------------------ + +Start Validation Audit +^^^^^^^^^^^^^^^^^^^^^^ + +:: + + ~/build_output/bin/fdbcli -C ~/fdb_test.cluster + + # Start validation for entire key range + fdb> audit_storage validate_restore "" "\xff" + +This returns an Audit ID. **Save this ID!** + +Example output:: + + Audit ID: 12345678-1234-5678-1234-567812345678 + +Monitor Progress +^^^^^^^^^^^^^^^^ + +:: + + # Check overall status + fdb> get_audit_status validate_restore id + + # Check detailed progress (shows which ranges are complete) + fdb> get_audit_status validate_restore progress + + # Check for any errors + fdb> get_audit_status validate_restore phase error + +Wait for Completion +^^^^^^^^^^^^^^^^^^^ + +Keep checking status until the audit completes. For a small dataset, this should take seconds to minutes. + +Phase 5: Verify Results +------------------------ + +Check Audit Status +^^^^^^^^^^^^^^^^^^ + +:: + + fdb> get_audit_status validate_restore id + +**Expected Output (Success)**:: + + Audit result is: + AuditStorageState: [ID]: , [Range]: ["","\\xff"), [Type]: 5, [Phase]: 2 + +Where: + +- Type: 5 = ValidateRestore +- Phase: 2 = Complete (no errors) + +**If Phase: 3 = Error**, there were validation failures! + +Check Trace Logs +^^^^^^^^^^^^^^^^ + +Look for validation events in the server logs:: + + grep "AuditRestore" ~/fdb_test_data/*.log | tail -20 + +Look for: + +- ``SSAuditRestoreBegin`` - Validation started +- ``SSAuditRestoreComplete`` - Validation finished successfully +- ``SSAuditRestoreError`` - Validation found errors (check details!) + +Phase 6: Testing a Failed Audit +-------------------------------- + +To verify that the audit correctly detects mismatches, you can intentionally modify the source data and rerun the audit. + +Modify Source Data +^^^^^^^^^^^^^^^^^^ + +:: + + ~/build_output/bin/fdbcli -C ~/fdb_test.cluster + + fdb> writemode on + + # Modify one of the original source values to create a mismatch + fdb> set testkey1 "modified_value" + + # Verify the change + fdb> get testkey1 + +This creates a mismatch because: + +- Current source data: ``testkey1`` = ``modified_value`` (modified after backup) +- Restored data: ``\xff\x02/rlog/testkey1`` = ``testvalue1`` (from backup) + +Run Audit Again +^^^^^^^^^^^^^^^ + +:: + + # Start a new validation audit + fdb> audit_storage validate_restore "" "\xff" + +Save the new Audit ID returned. + +Check for Expected Failure +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + # Monitor the audit status + fdb> get_audit_status validate_restore id + +**Expected Output (Failure)**:: + + Audit result is: + AuditStorageState: [ID]: , [Range]: ["","\\xff"), [Type]: 5, [Phase]: 3 + +Where: + +- Phase: 3 = Error (validation found mismatches) + +Check Error Details +^^^^^^^^^^^^^^^^^^^ + +:: + + # Check detailed error information + fdb> get_audit_status validate_restore phase error + + # Check trace logs for specific error details + grep "SSAuditRestoreError" ~/fdb_test_data/*.log | tail -20 + +The logs should show which key had a mismatch and what the differing values were. + +Restore Correct Source Data for Next Tests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:: + + # Restore the original source value to match the backup + fdb> writemode on + fdb> set testkey1 testvalue1 + + # Verify the restoration + fdb> get testkey1 + +Phase 7: Understanding Audit Design and Limitations +---------------------------------------------------- + +.. important:: + The restore validation audit is designed to run immediately after a restore operation to verify the restore process didn't corrupt data. It compares the current database state with the restored data at the time of the audit. + +.. important:: + **Lock the database before starting restore validation.** To avoid false positive audit errors caused by writes occurring between the backup snapshot and audit completion, you should lock the database before getting the restorableVersion and keep it locked until the audit completes. Any writes to the source data during this window will cause audit mismatches that are difficult to distinguish from actual restore corruption issues. + + **Both restore and audit operations will continue to work on a locked database.** They use ``LOCK_AWARE`` transactions internally, which allows system operations to proceed while the lock prevents regular application writes. This is the recommended approach for accurate validation. + + :: + + # Lock the database before restore + fdb> lock + + # Restore and audit will work - they use LOCK_AWARE transactions + # Regular application writes are blocked, preventing false positives + + # Proceed with restore and validation + # ... perform restore ... + # ... run audit ... + + # Unlock after audit completes + fdb> unlock + +What the Audit Validates +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The audit ensures: + +✅ The restore process worked without corruption during the restore operation + +✅ Current source data matches the restored data at the time of audit + +✅ No data corruption occurred between source and restored locations + +What the Audit Does NOT Validate +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The audit has these design limitations: + +❌ It does NOT detect if source data changed after the backup was created + +❌ It compares current source data to restored data, not backup data to restored data + +**Why This Matters**: + +If you: + +1. Create a backup with ``testkey1=value1`` +2. Modify source data to ``testkey1=value2`` +3. Restore the backup (restores ``testkey1=value1`` to ``\xff\x02/rlog/``) +4. Run the audit + +The audit will report an ERROR because current source (``value2``) doesn't match restored (``value1``). This is expected behavior - the audit validates restore integrity by comparing current state to restored state. + +When to Use Restore Validation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Use restore validation: + +✅ Immediately after completing a restore operation + +✅ To verify the restore process worked correctly + +✅ To ensure no corruption during data transfer + +Do NOT use restore validation: + +❌ To verify backup data integrity (use backup verification tools instead) + +❌ To check if source data matches the backup (they're expected to diverge) + +❌ As a long-term consistency check between source and restored data + +Phase 8: Cleanup +----------------- + +Unlock the Database +^^^^^^^^^^^^^^^^^^^ + +After validation is complete, unlock the database to allow normal operations: + +:: + + ~/build_output/bin/fdbcli -C ~/fdb_test.cluster + + # Unlock using the UID from the lock command + fdb> unlock + +Clear Restored Data +^^^^^^^^^^^^^^^^^^^ + +:: + + ~/build_output/bin/fdbcli -C ~/fdb_test.cluster + + fdb> option on ACCESS_SYSTEM_KEYS + fdb> writemode on + fdb> clearrange "\xff\x02/rlog/" "\xff\x02/rlog0" + +Verify Cleanup +^^^^^^^^^^^^^^ + +:: + + fdb> getrange "\xff\x02/rlog/" "\xff\x02/rlog0" + # Should return empty + +Troubleshooting +=============== + +"restore_destination_not_empty" Error +-------------------------------------- + +**Symptom**: Restore fails saying destination is not empty + +**Cause**: The restore is not using a prefix (addPrefix parameter) + +**Fix**: When restoring for validation, you must use the ``addPrefix`` parameter to restore +to a different keyspace (e.g., ``\xff\x02/rlog/``). This bypasses the empty destination check +that's enforced for regular restores (which protect against accidental data loss). + +**Note**: All restores (with or without prefix) will clear and overwrite the destination range. +The difference is that regular restores (no prefix) check for existing data and fail to prevent +accidents, while validation restores (with prefix) proceed because they're intentionally writing +to a dedicated validation keyspace. + +"No backup agents are responding" +---------------------------------- + +**Symptom**: After running ``fdbbackup start``, you see this message + +**Cause**: No backup agent process running to execute the backup + +**Fix**:: + + # Start backup agent daemon + ~/build_output/bin/backup_agent -C ~/fdb_test.cluster & + + # Wait a moment, then check backup status + sleep 5 + ~/build_output/bin/fdbbackup status -C ~/fdb_test.cluster + +"The specified backup is not restorable to any version" +-------------------------------------------------------- + +**Symptom**: Restore fails immediately with this error + +**Causes**: + +1. **Wrong backup URL**: Using parent directory instead of specific backup path +2. **Backup not complete**: Backup hasn't finished creating a restorable snapshot + +**Fix**:: + + # 1. Get the correct backup URL from status + ~/build_output/bin/fdbbackup status -C ~/fdb_test.cluster + # Look for: BackupURL: file:///.../backup- + + # 2. Use that EXACT URL in restore command + ~/build_output/bin/fdbrestore start \ + -r file:///Users/stack/fdb_backup/backup-2025-11-18-09-36-09.156836 \ + --dest-cluster-file ~/fdb_test.cluster \ + --add-prefix "\xff\x02/rlog/" -w + +Backup Not Restorable +---------------------- + +**Symptom**: Restore fails with "not restorable to any version" + +**Cause**: Backup hasn't completed or saved a snapshot yet + +**Fix**: + +- Wait longer (10-30 seconds minimum) +- Check ``fdbbackup status`` for restorable version +- Ensure backup agent is running + +No Progress Updates +------------------- + +**Symptom**: Audit status stays in "Running" phase forever + +**Cause**: Storage servers may not have the shard containing your key range + +**Fix**:: + + # Check if data distribution is working + fdbcli> status details + # Look for storage servers and their shard assignments + +Cannot See Restored Data +------------------------- + +**Symptom**: ``getrange "\xff\x02/rlog/"`` returns empty + +**Cause**: Need to enable system keys access + +**Fix**:: + + fdb> option on ACCESS_SYSTEM_KEYS + +Validation Completes but No Logs +--------------------------------- + +**Symptom**: Can't find trace events + +**Cause**: Logs may be in different location + +**Fix**:: + + # Find log location + ps aux | grep fdbserver + # Look for -L or --logdir parameter + + # Or check default locations + ls -ltr /var/log/foundationdb/ + ls -ltr ~/fdb_test_data/*.log + +Expected Performance +==================== + +- **Small dataset (100s of keys)**: Seconds +- **Medium dataset (10K keys)**: 1-5 minutes +- **Large dataset (1M+ keys)**: Hours (rate limited) + +Rate limiting is controlled by ``AUDIT_STORAGE_RATE_PER_SERVER_MAX`` (default: 50MB/s per server). diff --git a/fdbbackup/CMakeLists.txt b/fdbbackup/CMakeLists.txt index 9888088f662..ff0b0c2d559 100644 --- a/fdbbackup/CMakeLists.txt +++ b/fdbbackup/CMakeLists.txt @@ -63,4 +63,6 @@ if (NOT WIN32 AND NOT OPEN_FOR_IDE) enable_testing() add_test(NAME dir_backup_tests COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tests/dir_backup_test.sh ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR}) add_test(NAME s3_backup_tests COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tests/s3_backup_test.sh ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} --encrypt-at-random) + # Note: No --encrypt flag - BulkLoad doesn't support encryption yet + add_test(NAME s3_backup_bulkdump_bulkload_tests COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/tests/s3_backup_bulkdump_bulkload.sh ${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR}) endif() diff --git a/fdbbackup/backup.actor.cpp b/fdbbackup/backup.actor.cpp index 3c0b46d3cca..068b5b8be48 100644 --- a/fdbbackup/backup.actor.cpp +++ b/fdbbackup/backup.actor.cpp @@ -50,6 +50,9 @@ #include "fdbclient/S3BlobStore.h" #include "fdbclient/SystemData.h" #include "fdbclient/json_spirit/json_spirit_writer_template.h" +#include "fdbclient/BulkLoading.h" +#include "fdbclient/ManagementAPI.actor.h" +#include "fdbclient/BackupContainer.h" #include "flow/Platform.h" @@ -133,7 +136,7 @@ enum { OPT_DELETE_DATA, OPT_MIN_CLEANUP_SECONDS, OPT_USE_PARTITIONED_LOG, - OPT_ENCRYPT_FILES, + OPT_MODE, // Backup and Restore constants OPT_PROXY, @@ -281,7 +284,7 @@ CSimpleOpt::SOption g_rgBackupStartOptions[] = { { OPT_BLOB_CREDENTIALS, "--blob-credentials", SO_REQ_SEP }, { OPT_INCREMENTALONLY, "--incremental", SO_NONE }, { OPT_ENCRYPTION_KEY_FILE, "--encryption-key-file", SO_REQ_SEP }, - { OPT_ENCRYPT_FILES, "--encrypt-files", SO_REQ_SEP }, + { OPT_MODE, "--mode", SO_REQ_SEP }, TLS_OPTION_FLAGS, SO_END_OF_OPTIONS }; @@ -715,6 +718,7 @@ CSimpleOpt::SOption g_rgRestoreOptions[] = { { OPT_WAITFORDONE, "--waitfordone", SO_NONE }, { OPT_RESTORE_USER_DATA, "--user-data", SO_NONE }, { OPT_RESTORE_SYSTEM_DATA, "--system-metadata", SO_NONE }, + { OPT_MODE, "--mode", SO_REQ_SEP }, { OPT_RESTORE_VERSION, "--version", SO_REQ_SEP }, { OPT_RESTORE_VERSION, "-v", SO_REQ_SEP }, { OPT_TRACE, "--log", SO_NONE }, @@ -1095,6 +1099,10 @@ static void printBackupUsage(bool devhelp) { " For start or modify operations, specifies the backup's default target snapshot interval " "as DURATION seconds. Defaults to %d for start operations.\n", CLIENT_KNOBS->BACKUP_DEFAULT_SNAPSHOT_INTERVAL_SEC); + printf(" --mode MODE Snapshot mechanism to use: bulkdump, rangefile (default, legacy), or both.\n" + " bulkdump: Uses BulkDump SST files for faster restore performance\n" + " rangefile: Traditional range files for backward compatibility\n" + " both: Generate both formats for validation (increases backup size)\n"); printf(" --active-snapshot-interval DURATION\n" " For modify operations, sets the desired interval for the backup's currently active " "snapshot, relative to the start of the snapshot.\n"); @@ -1133,11 +1141,6 @@ static void printBackupUsage(bool devhelp) { " For modify operations, need to pass encryption key file only if Backup container URL is " "changed to " "re-encrypt all future backup files. \n"); - printf(" --encrypt-files 0/1" - " If passed, this argument will allow the user to override the database encryption state to " - "either enable (1) or disable (0) encryption at rest with snapshot backups. This option refers to block " - "level encryption of snapshot backups while --encryption-key-file (above) refers to file level encryption. " - "Generally, these two options should not be used together.\n"); printf(TLS_HELP); printf(" -w, --wait Wait for the backup to complete (allowed with `start' and `discontinue').\n"); @@ -1212,6 +1215,12 @@ static void printRestoreUsage(bool devhelp) { " To be used in conjunction with incremental restore.\n" " Indicates to the backup agent to only begin replaying log files from a certain version, " "instead of the entire set.\n"); + printf( + " --mode MODE Restore mechanism to use: rangefile (default), bulkload.\n" + " rangefile: Traditional range file restore from kvranges/\n" + " bulkload: Use BulkLoad for faster range data restoration if BulkDump dataset is available\n" + " If incomplete dataset: restore returns error with clear message directing user to retry with " + "--mode rangefile.\n"); printf(" --encryption-key-file" " The AES-256-GCM key in the provided file is used for decrypting backup files.\n"); printf(TLS_HELP); @@ -1381,7 +1390,6 @@ extern bool g_crashOnError; ProgramExe getProgramType(std::string programExe) { ProgramExe enProgramExe = ProgramExe::UNDEFINED; - // lowercase the string std::transform(programExe.begin(), programExe.end(), programExe.begin(), ::tolower); // Remove the extension, if Windows @@ -1451,7 +1459,6 @@ ProgramExe getProgramType(std::string programExe) { BackupType getBackupType(std::string backupType) { BackupType enBackupType = BackupType::UNDEFINED; - // lowercase the string std::transform(backupType.begin(), backupType.end(), backupType.begin(), ::tolower); static std::map values; @@ -1481,6 +1488,28 @@ BackupType getBackupType(std::string backupType) { return enBackupType; } +Optional getSnapshotMode(std::string mode) { + std::transform(mode.begin(), mode.end(), mode.begin(), ::tolower); + + if (mode == "rangefile") + return SnapshotMode::RANGEFILE; + if (mode == "bulkdump") + return SnapshotMode::BULKDUMP; + if (mode == "both") + return SnapshotMode::BOTH; + return Optional(); +} + +Optional getRestoreMode(std::string mode) { + std::transform(mode.begin(), mode.end(), mode.begin(), ::tolower); + + if (mode == "rangefile") + return RestoreMode::RANGEFILE; + if (mode == "bulkload") + return RestoreMode::BULKLOAD; + return Optional(); +} + RestoreType getRestoreType(std::string name) { if (name == "start") return RestoreType::START; @@ -1496,7 +1525,6 @@ RestoreType getRestoreType(std::string name) { DBType getDBType(std::string dbType) { DBType enBackupType = DBType::UNDEFINED; - // lowercase the string std::transform(dbType.begin(), dbType.end(), dbType.begin(), ::tolower); static std::map values; @@ -1603,6 +1631,32 @@ ACTOR Future getLayerStatus(Reference tr wait(waitForAll(tagLastRestorableVersions) && waitForAll(tagStates) && waitForAll(tagContainers) && waitForAll(tagRangeBytes) && waitForAll(tagLogBytes) && success(fBackupPaused)); + state std::vector> encryptionSetupResults; + state std::vector encryptionContainerIndices; + + for (int i = 0; i < tagContainers.size(); i++) { + if (tagContainers[i].get()->getEncryptionKeyFileName().present()) { + encryptionSetupResults.push_back(tagContainers[i].get()->encryptionSetupComplete()); + encryptionContainerIndices.push_back(i); + } + } + wait(waitForAllReady(encryptionSetupResults)); + json_spirit::mArray keysArr; + std::unordered_set seenKeyPaths; + for (int j = 0; j < encryptionContainerIndices.size() && j < 1e6; j++) { + int i = encryptionContainerIndices[j]; + std::string keyPath = tagContainers[i].get()->getEncryptionKeyFileName().get(); + + if (seenKeyPaths.find(keyPath) == seenKeyPaths.end()) { + seenKeyPaths.insert(keyPath); + json_spirit::mObject keyObj; + keyObj["path"] = tagContainers[i].get()->getEncryptionKeyFileName().get(); + keyObj["success"] = !encryptionSetupResults[j].isError(); + keysArr.push_back(keyObj); + } + } + o.create("encryption_keys") = keysArr; + JSONDoc tagsRoot = layerRoot.subDoc("tags.$latest"); layerRoot.create("tags.timestamp") = now(); layerRoot.create("total_workers.$sum") = @@ -1631,7 +1685,11 @@ ACTOR Future getLayerStatus(Reference tr tagRoot.create("range_bytes_written") = tagRangeBytes[j].get(); tagRoot.create("mutation_log_bytes_written") = tagLogBytes[j].get(); tagRoot.create("mutation_stream_id") = backupTagUids[j].toString(); - + tagRoot.create("file_level_encryption") = + tagContainers[j].get()->getEncryptionKeyFileName().present() ? true : false; + if (tagContainers[j].get()->getEncryptionKeyFileName().present()) { + tagRoot.create("encryption_key_file") = tagContainers[j].get()->getEncryptionKeyFileName().get(); + } j++; } } else if (exe == ProgramExe::DR_AGENT) { @@ -1981,14 +2039,14 @@ ACTOR Future submitBackup(Database db, int initialSnapshotIntervalSeconds, int snapshotIntervalSeconds, Standalone> backupRanges, - bool encryptionEnabled, std::string tagName, bool dryRun, WaitForComplete waitForCompletion, StopWhenDone stopWhenDone, UsePartitionedLog usePartitionedLog, IncrementalBackupOnly incrementalBackupOnly, - Optional encryptionKeyFile) { + Optional encryptionKeyFile, + SnapshotMode snapshotMode = SnapshotMode::RANGEFILE) { try { state FileBackupAgent backupAgent; ASSERT(!backupRanges.empty()); @@ -2038,11 +2096,11 @@ ACTOR Future submitBackup(Database db, snapshotIntervalSeconds, tagName, backupRanges, - encryptionEnabled, stopWhenDone, usePartitionedLog, incrementalBackupOnly, - encryptionKeyFile)); + encryptionKeyFile, + static_cast(snapshotMode))); // Wait for the backup to complete, if requested if (waitForCompletion) { @@ -2366,7 +2424,8 @@ ACTOR Future runRestore(Database db, std::string removePrefix, OnlyApplyMutationLogs onlyApplyMutationLogs, InconsistentSnapshotOnly inconsistentSnapshotOnly, - Optional encryptionKeyFile) { + Optional encryptionKeyFile, + RestoreMode restoreMode = RestoreMode::RANGEFILE) { ASSERT(!ranges.empty()); if (targetVersion != invalidVersion && !targetTimestamp.empty()) { @@ -2443,7 +2502,9 @@ ACTOR Future runRestore(Database db, onlyApplyMutationLogs, inconsistentSnapshotOnly, beginVersion, - encryptionKeyFile)); + encryptionKeyFile, + {}, + restoreMode == RestoreMode::RANGEFILE)); if (waitForDone && verbose) { // If restore is now complete then report version restored @@ -3705,8 +3766,7 @@ int main(int argc, char* argv[]) { bool dryRun = false; bool restoreSystemKeys = false; bool restoreUserKeys = false; - bool encryptionEnabled = true; - bool encryptSnapshotFilesPresent = false; + RestoreMode restoreMode = RestoreMode::RANGEFILE; // Default to traditional range file restore std::string traceDir = ""; std::string traceFormat = ""; std::string traceLogGroup; @@ -3728,6 +3788,7 @@ int main(int argc, char* argv[]) { DeleteData deleteData{ false }; Optional encryptionKeyFile; Optional blobManifestUrl; + SnapshotMode snapshotMode = SnapshotMode::RANGEFILE; // Default to legacy rangefile mode BackupModifyOptions modifyOptions; @@ -3881,25 +3942,6 @@ int main(int argc, char* argv[]) { case OPT_BASEURL: baseUrl = args->OptionArg(); break; - case OPT_ENCRYPT_FILES: { - const char* a = args->OptionArg(); - int encryptFiles; - if (!sscanf(a, "%d", &encryptFiles)) { - fprintf(stderr, "ERROR: Could not parse encrypt-files `%s'\n", a); - return FDB_EXIT_ERROR; - } - if (encryptFiles != 0 && encryptFiles != 1) { - fprintf(stderr, "ERROR: encrypt-files must be either 0 or 1\n"); - return FDB_EXIT_ERROR; - } - encryptSnapshotFilesPresent = true; - if (encryptFiles == 0) { - encryptionEnabled = false; - } else { - encryptionEnabled = true; - } - break; - } case OPT_RESTORE_CLUSTERFILE_DEST: restoreClusterFileDest = args->OptionArg(); break; @@ -4163,13 +4205,33 @@ int main(int argc, char* argv[]) { case OPT_JSON: jsonOutput = true; break; + case OPT_MODE: + // Handle mode parameter for both backup and restore + if (programExe == ProgramExe::BACKUP) { + // Validate and store mode parameter for snapshot generation + auto parsedMode = getSnapshotMode(args->OptionArg()); + if (!parsedMode.present()) { + fprintf(stderr, + "ERROR: Unknown snapshot mode '%s'. Valid modes are: rangefile, bulkdump, both\n", + args->OptionArg()); + return FDB_EXIT_ERROR; + } + snapshotMode = parsedMode.get(); + } else if (programExe == ProgramExe::RESTORE || programExe == ProgramExe::FASTRESTORE_TOOL) { + // Validate and store mode parameter for restore mechanism + auto parsedMode = getRestoreMode(args->OptionArg()); + if (!parsedMode.present()) { + fprintf(stderr, + "ERROR: Unknown restore mode '%s'. Valid modes are: rangefile, bulkload\n", + args->OptionArg()); + return FDB_EXIT_ERROR; + } + restoreMode = parsedMode.get(); + } + break; } } - if (encryptionKeyFile.present() && encryptSnapshotFilesPresent) { - fprintf(stderr, "WARNING: Use of --encrypt-files and --encryption-key-file together is discouraged\n"); - } - // Process the extra arguments for (int argLoop = 0; argLoop < args->FileCount(); argLoop++) { switch (programExe) { @@ -4412,14 +4474,14 @@ int main(int argc, char* argv[]) { initialSnapshotIntervalSeconds, snapshotIntervalSeconds, backupKeys, - encryptionEnabled, tagName, dryRun, waitForDone, stopWhenDone, usePartitionedLog, incrementalBackupOnly, - encryptionKeyFile)); + encryptionKeyFile, + snapshotMode)); break; } @@ -4604,8 +4666,8 @@ int main(int argc, char* argv[]) { removePrefix, onlyApplyMutationLogs, inconsistentSnapshotOnly, - encryptionKeyFile)); - + encryptionKeyFile, + restoreMode)); // Pass RestoreMode directly break; case RestoreType::WAIT: f = stopAfter(success(ba.waitRestore(db, KeyRef(tagName), Verbose::True))); diff --git a/fdbbackup/tests/backup_tests_common.sh b/fdbbackup/tests/backup_tests_common.sh new file mode 100644 index 00000000000..b4227aba26b --- /dev/null +++ b/fdbbackup/tests/backup_tests_common.sh @@ -0,0 +1,542 @@ +#!/usr/bin/env bash +# +# backup_tests_common.sh +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2024 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Common backup test functions +# Shared between s3_backup_test.sh, s3_backup_bulkdump_bulkload.sh, dir_backup_test.sh, etc. +# These functions work with both S3/blobstore and file-based backup testing + +# Helper function to add base arguments (cluster file and logging) +# Uses bash nameref (requires bash 4.3+) to modify the array in place +# $1 name of the array variable to modify (passed by name, not value) +# $2 cluster file path +# $3 log directory +# $4 cluster file flag: "backup" uses -C, "restore" uses --dest-cluster-file (default: restore) +function add_base_args { + local -n _args_ref="$1" + local cluster_file="${2}" + local log_dir="${3}" + local flag_type="${4:-restore}" + + if [[ "${flag_type}" == "backup" ]]; then + _args_ref+=("-C" "${cluster_file}") + else + _args_ref+=("--dest-cluster-file" "${cluster_file}") + fi + _args_ref+=("--log" "--logdir=${log_dir}") +} + +# Helper function to add common optional arguments to a command args array +# Uses bash nameref (requires bash 4.3+) to modify the array in place +# $1 name of the array variable to modify (passed by name, not value) +# $2 blob credentials file (optional) +# $3 mode (optional): for backup: bulkdump|rangefile|both; for restore: rangefile|bulkload +# $4 encryption key file (optional) +function add_common_optional_args { + local -n _args_ref="$1" + local blob_credentials="${2:-}" + local mode="${3:-}" + local encryption_key_file="${4:-}" + + if [[ -n "${blob_credentials}" ]]; then + _args_ref+=("--blob-credentials" "${blob_credentials}") + fi + + if [[ -n "${mode}" ]]; then + _args_ref+=("--mode" "${mode}") + fi + + if [[ -n "${encryption_key_file}" ]]; then + _args_ref+=("--encryption-key-file" "${encryption_key_file}") + fi + + for knob in "${KNOBS[@]}"; do + _args_ref+=("${knob}") + done +} + +# Pre-clear S3 URL before test (only for real S3, not MockS3Server) +# $1 build directory, $2 scratch directory, $3 url, $4 blob credentials file +# Returns 0 if cleared or skipped (MockS3Server), 1 on error +function s3_preclear_url { + local local_build_dir="${1}" + local local_scratch_dir="${2}" + local local_url="${3}" + local credentials="${4}" + + if [[ "${USE_S3}" != "true" ]]; then + # MockS3Server - buckets are lazily created, skip preclear + return 0 + fi + + local cmd=("${local_build_dir}/bin/s3client") + cmd+=("${KNOBS[@]}") + cmd+=("--tls-ca-file" "${TLS_CA_FILE}") + cmd+=("--blob-credentials" "${credentials}") + cmd+=("--log" "--logdir" "${local_scratch_dir}") + cmd+=("rm" "${local_url}") + + if ! "${cmd[@]}"; then + err "Failed pre-cleanup rm of ${local_url}" + return 1 + fi + return 0 +} + +# Cleanup S3 URL after test (works for both S3 and MockS3Server) +# $1 build directory, $2 scratch directory, $3 url, $4 blob credentials file +function s3_cleanup_url { + local local_build_dir="${1}" + local local_scratch_dir="${2}" + local local_url="${3}" + local credentials="${4}" + + local cmd=("${local_build_dir}/bin/s3client") + cmd+=("${KNOBS[@]}") + + # Only add TLS CA file for real S3, not MockS3Server + if [[ "${USE_S3}" == "true" ]]; then + cmd+=("--tls-ca-file" "${TLS_CA_FILE}") + fi + + cmd+=("--blob-credentials" "${credentials}") + cmd+=("--log" "--logdir" "${local_scratch_dir}") + cmd+=("rm" "${local_url}") + + if ! "${cmd[@]}"; then + err "Failed rm of ${local_url}" + return 1 + fi + return 0 +} + +# Shared backup function with optional parameters +# $1 build directory, $2 scratch directory, $3 backup url, $4 tag +# $5 encryption key file (optional) +# $6 backup mode (optional): bulkdump|rangefile|both - controls snapshot mechanism +# $7 blob credentials file (optional) +function run_backup { + local local_build_dir="${1}" + local local_scratch_dir="${2}" + local local_url="${3}" + local local_tag="${4}" + local local_encryption_key_file="${5:-}" + local backup_mode="${6:-}" + local blob_credentials="${7:-}" + + # Start backup without waiting, then monitor with timeout + local cluster_file="${local_scratch_dir}/loopback_cluster/fdb.cluster" + local cmd_args=( + "-t" "${local_tag}" + "-d" "${local_url}" + "-k" '"" \xff' + ) + add_base_args cmd_args "${cluster_file}" "${local_scratch_dir}" "backup" + add_common_optional_args cmd_args "${blob_credentials}" "${backup_mode}" "${local_encryption_key_file}" + + if [[ "${USE_PARTITIONED_LOG:-false}" == "true" ]]; then + cmd_args+=("--partitioned-log-experimental") + fi + + # Start backup without -w flag to avoid hanging + if ! "${local_build_dir}"/bin/fdbbackup start "${cmd_args[@]}"; then + err "Start fdbbackup failed" + return 1 + fi + + # Poll for backup to become restorable (STATE_RUNNING_DIFFERENTIAL) + # BulkDump mode may take longer than traditional backups as it needs to: + # 1. Submit a BulkDump job to the DD system + # 2. Wait for the job to complete (which writes SST files) + # 3. Write snapshot metadata + local timeout=600 # 10 minutes for BulkDump jobs + local poll_interval=5 + local elapsed=0 + + log "Waiting for backup to become restorable (${timeout}s timeout, polling every ${poll_interval}s)..." + + while [[ $elapsed -lt $timeout ]]; do + sleep $poll_interval + elapsed=$((elapsed + poll_interval)) + + # Check backup status using fdbbackup status command + set +e + status_output=$("${local_build_dir}"/bin/fdbbackup status -t "${local_tag}" -C "${local_scratch_dir}/loopback_cluster/fdb.cluster" --log --logdir="${local_scratch_dir}" 2>&1) + status_exit_code=$? + set -e + + # Check if backup is restorable (differential state) or completed + if echo "${status_output}" | grep -q "is restorable"; then + log "Backup is now restorable after ${elapsed}s" + break + fi + + if echo "${status_output}" | grep -q "completed"; then + log "Backup completed after ${elapsed}s" + break + fi + + # Log progress every 30 seconds + if [[ $((elapsed % 30)) -eq 0 ]]; then + log "Still waiting for backup to become restorable (${elapsed}s elapsed)..." + # Show current state for debugging + if echo "${status_output}" | grep -q "is restorable"; then + log " Status: backup is restorable (should have exited loop)" + elif echo "${status_output}" | grep -q "in progress to"; then + log " Status: backup running, waiting for snapshot to complete" + elif echo "${status_output}" | grep -q "just started"; then + log " Status: backup submitted, tasks starting up" + fi + # Check snapshot mode for debugging + if echo "${status_output}" | grep -q "Snapshot Mode: bulkdump"; then + log " Snapshot Mode: bulkdump (using BulkDump for snapshots)" + elif echo "${status_output}" | grep -q "Snapshot Mode: both"; then + log " Snapshot Mode: both (generating both formats)" + fi + fi + done + + if [[ $elapsed -ge $timeout ]]; then + err "Timeout waiting for backup to become restorable after ${timeout}s" + log "Final status output:" + echo "${status_output}" + return 1 + fi + + # Check if backup already completed (no need to discontinue) + if echo "${status_output}" | grep -q "completed"; then + log "Backup already completed - no need to discontinue" + return 0 + fi + + # Stop the backup to finalize it (only if still running) + log "Stopping backup to finalize restorable state" + set +e + stop_output=$("${local_build_dir}"/bin/fdbbackup discontinue -t "${local_tag}" -C "${local_scratch_dir}/loopback_cluster/fdb.cluster" --log --logdir="${local_scratch_dir}" 2>&1) + stop_exit_code=$? + set -e + + if [[ $stop_exit_code -ne 0 ]]; then + if echo "${stop_output}" | grep -q "already discontinued\|not running\|unneeded"; then + log "Backup already completed and finalized - this is success!" + else + err "Failed to stop backup: ${stop_output}" + return 1 + fi + else + log "Backup stopped successfully" + fi + + # Brief wait for backup to finish and create final metadata + sleep 5 + log "Backup finalized and should be restorable" + return 0 +} + +# Shared restore function with optional parameters +# $1 build directory, $2 scratch directory, $3 backup url, $4 tag +# $5 encryption key file (optional) +# $6 restore mode (optional): rangefile|bulkload - controls restore mechanism +# $7 blob credentials file (optional) +function run_restore { + local local_build_dir="${1}" + local local_scratch_dir="${2}" + local local_url="${3}" + local local_tag="${4}" + local local_encryption_key_file="${5:-}" + local restore_mode="${6:-}" + local blob_credentials="${7:-}" + + # Start restore without waiting, then monitor with timeout + local cluster_file="${local_scratch_dir}/loopback_cluster/fdb.cluster" + local cmd_args=( + "-t" "${local_tag}" + "-r" "${local_url}" + ) + add_base_args cmd_args "${cluster_file}" "${local_scratch_dir}" + add_common_optional_args cmd_args "${blob_credentials}" "${restore_mode}" "${local_encryption_key_file}" + + # Start restore without -w flag to avoid hanging + if ! "${local_build_dir}"/bin/fdbrestore start "${cmd_args[@]}"; then + err "Start fdbrestore failed" + return 1 + fi + + # Poll for restore to complete + # BulkLoad mode may take longer as it uses a different restoration mechanism + local timeout=600 # 10 minutes + local poll_interval=5 + local elapsed=0 + + log "Waiting for restore to complete (${timeout}s timeout, polling every ${poll_interval}s)..." + + while [[ $elapsed -lt $timeout ]]; do + sleep $poll_interval + elapsed=$((elapsed + poll_interval)) + + # Check restore status using fdbrestore status command + set +e + status_output=$("${local_build_dir}"/bin/fdbrestore status -t "${local_tag}" --dest-cluster-file "${local_scratch_dir}/loopback_cluster/fdb.cluster" --log --logdir="${local_scratch_dir}" 2>&1) + status_exit_code=$? + set -e + + # Check if restore completed + # Status output contains "State: completed" or "Phase: Complete" when done + # Also check "No restore" for when restore tag doesn't exist (completed and cleaned up) + if echo "${status_output}" | grep -qi "State:.*completed\|Phase:.*Complete\|No restore"; then + log "Restore completed after ${elapsed}s" + return 0 + fi + + # Check if restore failed (be specific - "LastError: None" contains "Error" so avoid false positives) + if echo "${status_output}" | grep -qi "State:.*aborted"; then + err "Restore aborted after ${elapsed}s" + log "Status output:" + echo "${status_output}" + return 1 + fi + + # Check for actual errors (not "LastError: None") + if echo "${status_output}" | grep -i "LastError:" | grep -qvi "None"; then + err "Restore has error after ${elapsed}s" + log "Status output:" + echo "${status_output}" + return 1 + fi + + # Log progress every 30 seconds + if [[ $((elapsed % 30)) -eq 0 ]]; then + log "Still waiting for restore to complete (${elapsed}s elapsed)..." + # Show phase info for debugging + if echo "${status_output}" | grep -qi "Phase:"; then + phase_info=$(echo "${status_output}" | grep -i "Phase:" | head -1) + log " ${phase_info}" + fi + fi + done + + if [[ $elapsed -ge $timeout ]]; then + err "Timeout waiting for restore to complete after ${timeout}s" + log "Final status output:" + echo "${status_output}" + return 1 + fi + + return 0 +} + +# Test encryption mismatches - works with both S3 and file-based backups +# $1 build directory, $2 scratch directory, $3 backup url, $4 tag +# $5 encryption key file used for backup (empty if no encryption), $6 blob credentials file (optional) +function test_encryption_mismatches { + local local_build_dir="${1}" + local local_scratch_dir="${2}" + local local_url="${3}" + local local_tag="${4}" + local backup_encryption_key_file="${5}" + local blob_credentials="${6:-}" + + local cluster_file="${local_scratch_dir}/loopback_cluster/fdb.cluster" + + # Create separate log directory for encryption mismatch tests + local mismatch_logdir="${local_scratch_dir}/encryption_mismatch_logs" + mkdir -p "${mismatch_logdir}" + + # Build base restore args once - reused for all tests + local base_args=( + "-t" "${local_tag}" "-w" + "-r" "${local_url}" + ) + add_base_args base_args "${cluster_file}" "${mismatch_logdir}" + add_common_optional_args base_args "${blob_credentials}" "" "" + + if [[ -n "${backup_encryption_key_file}" ]]; then + # Backup was encrypted - test mismatches + log "Testing encryption mismatches for encrypted backup" + + # Test 1: Encrypted backup → restore without encryption (should fail) + log "Test 1: Attempting restore without encryption on encrypted backup (should fail)" + + set +e + "${local_build_dir}"/bin/fdbrestore start "${base_args[@]}" 2>"${mismatch_logdir}/test1_stderr.log" + local exit_code1=$? + set -e + + if [[ ${exit_code1} -eq 0 ]]; then + err "Restore without encryption on encrypted backup succeeded when it should have failed!" + rm -rf "${mismatch_logdir}" + return 1 + fi + log "SUCCESS: Restore without encryption on encrypted backup failed as expected (exit code: ${exit_code1})" + + # Test 2: Encrypted backup → restore with wrong encryption key (should fail) + local wrong_key_file="${local_scratch_dir}/wrong_key" + create_encryption_key_file "${wrong_key_file}" + + log "Test 2: Attempting restore with wrong encryption key (should fail)" + # Copy base_args and add encryption key + local cmd_args2=("${base_args[@]}" "--encryption-key-file" "${wrong_key_file}") + + set +e + "${local_build_dir}"/bin/fdbrestore start "${cmd_args2[@]}" 2>"${mismatch_logdir}/test2_stderr.log" + local exit_code2=$? + set -e + + rm -f "${wrong_key_file}" + + if [[ ${exit_code2} -eq 0 ]]; then + err "Restore with wrong encryption key succeeded when it should have failed!" + rm -rf "${mismatch_logdir}" + return 1 + fi + log "SUCCESS: Restore with wrong encryption key failed as expected (exit code: ${exit_code2})" + + else + # Backup was not encrypted - test mismatch + log "Testing encryption mismatch for unencrypted backup" + + # Test: Unencrypted backup → restore with encryption (should fail) + local any_key_file="${local_scratch_dir}/any_key" + create_encryption_key_file "${any_key_file}" + + log "Test: Attempting restore with encryption on unencrypted backup (should fail)" + # Copy base_args and add encryption key + local cmd_args=("${base_args[@]}" "--encryption-key-file" "${any_key_file}") + + set +e + "${local_build_dir}"/bin/fdbrestore start "${cmd_args[@]}" 2>"${mismatch_logdir}/test_stderr.log" + local exit_code=$? + set -e + + rm -f "${any_key_file}" + + if [[ ${exit_code} -eq 0 ]]; then + err "Restore with encryption on unencrypted backup succeeded when it should have failed!" + rm -rf "${mismatch_logdir}" + return 1 + fi + log "SUCCESS: Restore with encryption on unencrypted backup failed as expected (exit code: ${exit_code})" + fi + + # Clean up separate log directory + rm -rf "${mismatch_logdir}" + + log "All encryption mismatch tests completed successfully" + return 0 +} + +# Helper function to wait for restore completion by polling status +# $1 build directory, $2 scratch directory, $3 restore tag +function run_restore_wait { + local local_build_dir="${1}" + local local_scratch_dir="${2}" + local local_tag="${3}" + + local timeout=600 + local poll_interval=5 + local elapsed=0 + + while [[ $elapsed -lt $timeout ]]; do + sleep $poll_interval + elapsed=$((elapsed + poll_interval)) + + set +e + local status_output + status_output=$("${local_build_dir}"/bin/fdbrestore status -t "${local_tag}" --dest-cluster-file "${local_scratch_dir}/loopback_cluster/fdb.cluster" --log --logdir="${local_scratch_dir}" 2>&1) + set -e + + if echo "${status_output}" | grep -qi "State:.*completed\|Phase:.*Complete\|No restore"; then + return 0 + fi + + if echo "${status_output}" | grep -qi "State:.*aborted"; then + return 1 + fi + + if echo "${status_output}" | grep -i "LastError:" | grep -qvi "None"; then + return 1 + fi + done + + return 1 +} + +# NOTE: setup_s3_environment and setup_tls_ca_file are defined in tests_common.sh +# They use TESTS_COMMON_DIR to reliably find aws_fixture.sh and mocks3_fixture.sh + +# Setup common backup test environment and knobs +# $1 http verbose level, $2 additional knobs array (optional) +function setup_backup_test_environment { + local http_verbose_level="${1}" + local additional_knobs=("${@:2}") + + # Clear proxy environment variables + unset HTTP_PROXY + unset HTTPS_PROXY + + # Set USE_S3 based on environment + readonly USE_S3="${USE_S3:-$( if [[ -n "${OKTETO_NAMESPACE+x}" ]]; then echo "true" ; else echo "false"; fi )}" + + # Set KNOBS based on whether we're using real S3 or MockS3Server + if [[ "${USE_S3}" == "true" ]]; then + # Use AWS KMS encryption for real S3 + KNOBS=("--knob_blobstore_encryption_type=aws:kms" "--knob_http_verbose_level=${http_verbose_level}") + else + # No encryption for MockS3Server + KNOBS=("--knob_http_verbose_level=${http_verbose_level}") + fi + + # Add any additional knobs (handle empty array when set -u is enabled) + if [[ ${#additional_knobs[@]} -gt 0 ]]; then + KNOBS+=("${additional_knobs[@]}") + fi + readonly KNOBS + + setup_tls_ca_file +} + +# Setup FDB cluster with backup agent - common pattern +# $1 source_dir, $2 build_dir, $3 test_scratch_dir, $4 process_count, $5+ cluster_knobs +function setup_fdb_cluster_with_backup { + local _src_dir="${1}" + local _bld_dir="${2}" + local _scratch_dir="${3}" + local _proc_count="${4:-1}" + local _cluster_knobs=("${@:5}") + + # Source FDB cluster fixture + if ! source "${cwd}/../../fdbclient/tests/fdb_cluster_fixture.sh"; then + err "Failed to source fdb_cluster_fixture.sh" + exit 1 + fi + + # Startup fdb cluster + if ! start_fdb_cluster "${_src_dir}" "${_bld_dir}" "${_scratch_dir}" "${_proc_count}" "${_cluster_knobs[@]}"; then + err "Failed start FDB cluster" + exit 1 + fi + log "FDB cluster is up" + + # Start backup agent with KNOBS (set by setup_backup_test_environment) + if ! start_backup_agent "${_bld_dir}" "${_scratch_dir}" "${KNOBS[@]}"; then + err "Failed start backup_agent" + exit 1 + fi + log "Backup_agent is up" +} \ No newline at end of file diff --git a/fdbbackup/tests/dir_backup_test.sh b/fdbbackup/tests/dir_backup_test.sh index 948611691f4..27523b56c53 100755 --- a/fdbbackup/tests/dir_backup_test.sh +++ b/fdbbackup/tests/dir_backup_test.sh @@ -62,13 +62,6 @@ function resolve_to_absolute_path { realpath "${p}" } -function create_encryption_key_file { - local key_file="${1}" - log "Creating encryption key file at ${key_file}" - dd if=/dev/urandom bs=32 count=1 of="${key_file}" 2>/dev/null - chmod 600 "${key_file}" -} - # Run the fdbbackup command. # $1 The build directory so we can find bin/fdbbackup command. # $2 The scratch directory where the fdb.cluster file can be found. @@ -156,6 +149,9 @@ function test_dir_backup_and_restore { err "Failed backup" return 1 fi + + test_fdbcli_status_json_for_bkup "${local_build_dir}" "${scratch_dir}" + log "Clear fdb data" if ! clear_data "${local_build_dir}" "${scratch_dir}"; then err "Failed clear data in fdb" diff --git a/fdbbackup/tests/s3_backup_bulkdump_bulkload.sh b/fdbbackup/tests/s3_backup_bulkdump_bulkload.sh new file mode 100755 index 00000000000..1df17cd3150 --- /dev/null +++ b/fdbbackup/tests/s3_backup_bulkdump_bulkload.sh @@ -0,0 +1,472 @@ +#!/usr/bin/env bash +# +# s3_backup_bulkdump_bulkload.sh +# +# This source file is part of the FoundationDB open source project +# +# Copyright 2013-2024 Apple Inc. and the FoundationDB project authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Test BulkLoad restore validation against traditional restore. +# +# This test validates BulkLoad produces identical results to traditional restore +# by comparing two restore methods using audit_storage validate_restore: +# +# 1. Loads test data into the database +# 2. Creates backup using "both" mode (writes BOTH range files AND SST files) +# - Range files are used by traditional restore +# - SST files are used by BulkLoad restore +# 3. Restores with --add-prefix to system keyspace using TRADITIONAL (rangefile) mode +# - This creates a "known good" baseline in system keys +# 4. Clears normalKeys (original data) +# 5. Restores to normalKeys using BULKLOAD mode (reads SST files) +# NOTE: If encryption is enabled, uses rangefile mode instead (BulkLoad doesn't support encryption) +# 6. Runs audit_storage validate_restore to compare (skipped if encryption enabled): +# - BulkLoad-restored data (in normalKeys) +# - Traditional-restored data (in system key prefix) +# - This validates BulkLoad produces identical results to traditional restore +# 7. Cleans up validation prefix data +# 8. Tests encryption mismatch handling +# +# Usage: +# s3_backup_bulkdump_bulkload.sh [scratch_dir] [--encrypt] + +# Install signal traps. Depends on globals being set. +# Calls the cleanup function. +trap "exit 1" HUP INT PIPE QUIT TERM +trap cleanup EXIT + +# Cleanup. Called from signal trap. +function cleanup { + # Check if test data should be preserved (common function from tests_common.sh) + if cleanup_with_preserve_check; then + return 0 + fi + + if type shutdown_fdb_cluster &> /dev/null; then + shutdown_fdb_cluster + fi + if type shutdown_mocks3 &> /dev/null; then + shutdown_mocks3 + fi + if type shutdown_aws &> /dev/null; then + shutdown_aws "${TEST_SCRATCH_DIR}" + fi + + # Clean up encryption key file + if [[ -n "${ENCRYPTION_KEY_FILE:-}" ]] && [[ -f "${ENCRYPTION_KEY_FILE}" ]]; then + rm -f "${ENCRYPTION_KEY_FILE}" + fi +} + +# Resolve passed in reference to an absolute path. +# e.g. /tmp on mac is actually /private/tmp. +# $1 path to resolve +function resolve_to_absolute_path { + local p="${1}" + while [[ -h "${p}" ]]; do + dir=$( cd -P "$( dirname "${p}" )" >/dev/null 2>&1 && pwd ) + p=$(readlink "${p}") + [[ ${p} != /* ]] && p="${dir}/${p}" + done + realpath "${p}" +} + +# Constants for validation prefix +readonly VALIDATION_PREFIX='\xff\x02/rlog/' +readonly VALIDATION_PREFIX_END='\xff\x02/rlog0' + +# Restore with prefix for validation - does NOT run audit or cleanup +# Use this when you want to keep the prefixed data for later comparison +# $1 build directory, $2 scratch directory, $3 backup url, $4 tag +# $5 encryption key file (optional), $6 restore mode (optional), $7 blob credentials file (optional) +function restore_with_prefix_for_validation { + local local_build_dir="${1}" + local local_scratch_dir="${2}" + local local_url="${3}" + local local_tag="${4}" + local local_encryption_key_file="${5:-}" + local restore_mode="${6:-}" + local blob_credentials="${7:-}" + + local fdbcli="${local_build_dir}/bin/fdbcli" + local cluster_file="${local_scratch_dir}/loopback_cluster/fdb.cluster" + + log "Restoring backup with prefix ${VALIDATION_PREFIX} for validation..." + local cmd_args=( + "-t" "${local_tag}_validate" + "-r" "${local_url}" + "--add-prefix" "${VALIDATION_PREFIX}" + ) + add_base_args cmd_args "${cluster_file}" "${local_scratch_dir}" + add_common_optional_args cmd_args "${blob_credentials}" "${restore_mode}" "${local_encryption_key_file}" + + if ! "${local_build_dir}"/bin/fdbrestore start "${cmd_args[@]}"; then + err "Failed to start validation restore" + return 1 + fi + + if ! run_restore_wait "${local_build_dir}" "${local_scratch_dir}" "${local_tag}_validate"; then + err "Validation restore failed to complete" + return 1 + fi + + # Debug: Check if data was restored to the prefix + log "Checking restored data at prefix..." + local restored_check + restored_check=$("${fdbcli}" -C "${cluster_file}" --exec "option on READ_SYSTEM_KEYS; getrangekeys \"${VALIDATION_PREFIX}\" \"${VALIDATION_PREFIX_END}\" 10" 2>&1) || true + log "Restored data check: ${restored_check}" + + log "Validation restore with prefix completed (data kept for comparison)" + return 0 +} + +# Run audit_storage validate_restore to compare normalKeys vs prefixed data +# Call this AFTER restoring with prefix and AFTER populating normalKeys with data to compare +# $1 build directory, $2 scratch directory +function run_validate_restore_audit { + local local_build_dir="${1}" + local local_scratch_dir="${2}" + + local fdbcli="${local_build_dir}/bin/fdbcli" + local cluster_file="${local_scratch_dir}/loopback_cluster/fdb.cluster" + + log "Running audit_storage validate_restore..." + + local audit_output + local audit_id + local max_retries=10 + local retry_delay=5 + local attempt=0 + + while [[ $attempt -lt $max_retries ]]; do + attempt=$((attempt + 1)) + log "Audit attempt ${attempt}/${max_retries}..." + + audit_output=$("${fdbcli}" -C "${cluster_file}" --exec "audit_storage validate_restore \"\" \\xff" 2>&1) + log "Audit command output: ${audit_output}" + + audit_id=$(echo "${audit_output}" | grep -oE '[0-9a-f]{32}' | head -1) + + if [[ -n "${audit_id}" ]]; then + log "Audit started with ID: ${audit_id}" + break + fi + + if echo "${audit_output}" | grep -qE "1221|1230|1010"; then + log "Transient error detected, retrying in ${retry_delay}s..." + sleep $retry_delay + continue + fi + + err "Failed to extract audit ID from output: ${audit_output}" + return 1 + done + + if [[ -z "${audit_id}" ]]; then + err "Failed to start audit after ${max_retries} attempts" + return 1 + fi + + # Monitor audit progress + local timeout=300 + local poll_interval=5 + local elapsed=0 + + log "Waiting for audit to complete (${timeout}s timeout)..." + + while [[ $elapsed -lt $timeout ]]; do + sleep $poll_interval + elapsed=$((elapsed + poll_interval)) + + local status_output + status_output=$("${fdbcli}" -C "${cluster_file}" --exec "get_audit_status validate_restore id ${audit_id}" 2>&1) + + if echo "${status_output}" | grep -q "Phase.*2"; then + log "Audit completed successfully after ${elapsed}s" + return 0 + fi + + if echo "${status_output}" | grep -q "Phase.*[34]"; then + err "Audit failed with status: ${status_output}" + return 1 + fi + + if [[ $((elapsed % 30)) -eq 0 ]]; then + log "Still waiting for audit (${elapsed}s)... Status: ${status_output}" + fi + done + + err "Timeout waiting for audit after ${timeout}s" + return 1 +} + +# Clean up the validation prefix data from system keyspace +# $1 build directory, $2 scratch directory +function cleanup_validation_prefix { + local local_build_dir="${1}" + local local_scratch_dir="${2}" + + local fdbcli="${local_build_dir}/bin/fdbcli" + local cluster_file="${local_scratch_dir}/loopback_cluster/fdb.cluster" + + log "Cleaning up validation data from ${VALIDATION_PREFIX}..." + "${fdbcli}" -C "${cluster_file}" --exec "writemode on; clearrange \"${VALIDATION_PREFIX}\" \"${VALIDATION_PREFIX_END}\"" 2>/dev/null || true + log "Validation prefix data cleaned up" +} + +# Run simple BulkDump backup and BulkLoad restore test. +# $1 The url to use +# $2 the scratch directory +# $3 The credentials file. +# $4 build directory +# $5 encryption key file (optional) +function test_bulkdump_bulkload { + local local_url="${1}" + local local_scratch_dir="${2}" + local credentials="${3}" + local local_build_dir="${4}" + local local_encryption_key_file="${5:-}" + + # Edit the url. Backup adds 'data' to the path. Need this url for cleanup. + local edited_url=$(echo "${local_url}" | sed -e "s/ctest/data\/ctest/" ) + readonly edited_url + if ! s3_preclear_url "${local_build_dir}" "${local_scratch_dir}" "${edited_url}" "${credentials}"; then + return 1 + fi + log "Load minimal data for faster backup" + if ! load_data "${local_build_dir}" "${local_scratch_dir}"; then + err "Failed loading data into fdb" + return 1 + fi + # Use "both" mode to create BOTH range files AND SST files + # This allows traditional restore (uses range files) and BulkLoad (uses SST files) to both work + log "Run backup with 'both' mode (creates range files AND SST files)" + if ! run_backup "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${TAG}" "${local_encryption_key_file}" "both" "${credentials}"; then + err "Failed backup" + return 1 + fi + + # BulkLoad validation: compare BulkLoad restore vs traditional restore + # 1. Restore with prefix using TRADITIONAL (rangefile) mode - this is our "known good" baseline + # 2. Clear normalKeys (original data) + # 3. Restore to normalKeys using BULKLOAD mode + # 4. Run audit_storage validate_restore to compare BulkLoad result vs traditional result + # This validates that BulkLoad produces identical results to traditional restore. + + # Step 1: Restore with prefix using traditional rangefile mode (keep the data) + log "Restoring with prefix using traditional rangefile mode..." + if ! restore_with_prefix_for_validation "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${TAG}" "${local_encryption_key_file}" "rangefile" "${credentials}"; then + err "Failed validation restore with prefix" + return 1 + fi + log "Traditional restore with prefix completed" + + # Step 2: Clear normalKeys (original data) + log "Clear fdb normalKeys data" + if ! clear_data "${local_build_dir}" "${local_scratch_dir}"; then + err "Failed clear data in fdb" + return 1 + fi + + # Step 3: Restore to normalKeys + # NOTE: BulkLoad doesn't support encryption yet, so use traditional restore when encrypted + if [[ -n "${local_encryption_key_file}" ]]; then + log "Restore using rangefile mode (BulkLoad doesn't support encryption yet)" + if ! run_restore "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${TAG}" "${local_encryption_key_file}" "rangefile" "${credentials}"; then + err "Failed rangefile restore" + return 1 + fi + log "SKIPPING BulkLoad validation (encryption not supported by BulkLoad)" + # Clean up the prefixed validation data + cleanup_validation_prefix "${local_build_dir}" "${local_scratch_dir}" + else + log "Restore using BulkLoad mode" + if ! run_restore "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${TAG}" "${local_encryption_key_file}" "bulkload" "${credentials}"; then + err "Failed BulkLoad restore" + return 1 + fi + + # Step 4: Run audit to compare BulkLoad-restored (normalKeys) vs traditional-restored (prefix) + log "Running audit_storage validate_restore..." + log "Comparing BulkLoad-restored data against traditional-restored data..." + if ! run_validate_restore_audit "${local_build_dir}" "${local_scratch_dir}"; then + err "Failed audit-based restore validation - BulkLoad result differs from traditional restore!" + return 1 + fi + log "Audit validation PASSED - BulkLoad produces identical results to traditional restore" + + # Step 5: Clean up the prefixed validation data + log "Cleaning up validation prefix data..." + cleanup_validation_prefix "${local_build_dir}" "${local_scratch_dir}" + fi + + # Additional verification + log "Verify restored data matches expected values" + if ! verify_data "${local_build_dir}" "${local_scratch_dir}"; then + err "Failed verification of restored data" + return 1 + fi + log "BulkLoad restore verification PASSED" + + # Test encryption mismatches (using shared function) + log "Testing encryption mismatches" + test_encryption_mismatches "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${TAG}" "${local_encryption_key_file}" "${credentials}" + + # Cleanup test data. + if ! s3_cleanup_url "${local_build_dir}" "${local_scratch_dir}" "${edited_url}" "${credentials}"; then + return 1 + fi + log "Check for Severity=40 errors" + if ! grep_for_severity40 "${local_scratch_dir}"; then + err "Found Severity=40 errors in logs" + return 1 + fi +} + +# (test_encryption_mismatches now in tests_common.sh as test_encryption_mismatches_s3) + +# set -o xtrace # a.k.a set -x # Set this one when debugging (or 'bash -x THIS_SCRIPT'). +set -o errexit # a.k.a. set -e +set -o nounset # a.k.a. set -u +set -o pipefail +set -o noclobber + +# Parse command line arguments (keep original - not shared) +USE_ENCRYPTION=false +USE_PARTITIONED_LOG=false # Default to false for BulkLoad testing +PARAMS=() + +while (( "$#" )); do + case "$1" in + --encrypt) + USE_ENCRYPTION=true + shift + ;; + --encrypt-at-random) + USE_ENCRYPTION=$(((RANDOM % 2)) && echo true || echo false ) + shift + ;; + --partitioned-log-experimental) + USE_PARTITIONED_LOG=true + shift + ;; + --partitioned-log-experimental-at-random) + USE_PARTITIONED_LOG=$(((RANDOM % 2)) && echo true || echo false ) + shift + ;; + -*|--*=) # unsupported flags + err "Error: Unsupported flag $1" >&2 + exit 1 + ;; + *) # preserve positional arguments + PARAMS+=("$1") + shift + ;; + esac +done + +# Set positional arguments in their proper place +if [ ${#PARAMS[@]} -ne 0 ]; then + set -- "${PARAMS[@]}" +fi + +# Get the working directory for this script. +if ! path=$(resolve_to_absolute_path "${BASH_SOURCE[0]}"); then + echo "Failed resolve_to_absolute_path" >&2 + exit 1 +fi +if ! cwd=$( cd -P "$( dirname "${path}" )" >/dev/null 2>&1 && pwd ); then + echo "Failed dirname on ${path}" >&2 + exit 1 +fi +readonly cwd + +# Use minimal data to prevent infinite backup logs (must be set BEFORE sourcing tests_common.sh) +export FDB_DATA_KEYCOUNT=10 + +# Source common test functions first (needed for setup_backup_test_environment) +# shellcheck source=/dev/null +if ! source "${cwd}/../../fdbclient/tests/tests_common.sh"; then + echo "Failed to source tests_common.sh" >&2 + exit 1 +fi +# shellcheck source=/dev/null +if ! source "${cwd}/backup_tests_common.sh"; then + echo "Failed to source backup_tests_common.sh" >&2 + exit 1 +fi + +# Globals +TEST_SCRATCH_DIR= +readonly TAG="test_backup_bulkdump" + +# Setup common environment (USE_S3, KNOBS, TLS_CA_FILE, clears HTTP_PROXY/HTTPS_PROXY) +setup_backup_test_environment 2 +# Process command-line options. +if (( $# < 2 )) || (( $# > 3 )); then + echo "ERROR: ${0} requires the fdb src and build directories --" + echo "CMAKE_SOURCE_DIR and CMAKE_BINARY_DIR -- and then, optionally," + echo "a directory into which we write scratch test data and logs" + echo "(otherwise we will write to subdirs under $TMPDIR). We will" + echo "leave the download of seaweed this directory for other" + echo "tests to find if they need it. Otherwise, we clean everything" + echo "else up on our way out." + echo "Example: ${0} ./foundationdb ./build_output ./scratch_dir [--encrypt]" + exit 1 +fi +if ! source_dir=$(is_fdb_source_dir "${1}"); then + err "${1} is not an fdb source directory" + exit 1 +fi +readonly source_dir +readonly build_dir="${2}" +if [[ ! -d "${build_dir}" ]]; then + err "${build_dir} is not a directory" + exit 1 +fi +scratch_dir="${TMPDIR:-/tmp}" +if (( $# == 3 )); then + scratch_dir="${3}" +fi +readonly scratch_dir + +# Create encryption key file if needed +ENCRYPTION_KEY_FILE="" +if [[ "${USE_ENCRYPTION}" == "true" ]]; then + log "Enabling encryption for backups" + ENCRYPTION_KEY_FILE="${scratch_dir}/test_encryption_key_file" + create_encryption_key_file "${ENCRYPTION_KEY_FILE}" + log "Created encryption key file at ${ENCRYPTION_KEY_FILE}" +else + log "Using plaintext for backups" +fi +readonly ENCRYPTION_KEY_FILE +readonly USE_PARTITIONED_LOG + +# Setup S3/MockS3 environment using common function +readonly temp_dir_prefix="mocks3_backup_test" +readonly url_path_prefix="ctests/$$" +setup_s3_environment "${build_dir}" "${scratch_dir}" "${temp_dir_prefix}" + +# Startup fdb cluster and backup agent with BulkLoad knobs +# Use 2 storage servers so BulkLoad can find a different server than the BulkDump source +setup_fdb_cluster_with_backup "${source_dir}" "${build_dir}" "${TEST_SCRATCH_DIR}" 2 \ + "--knob_shard_encode_location_metadata=1" "--knob_enable_read_lock_on_range=1" "--knob_blobstore_encryption_type=aws:kms" + +# Run tests. +test="test_bulkdump_bulkload" +url="blobstore://${host}/${url_path_prefix}/${test}?${query_str}" +test_bulkdump_bulkload "${url}" "${TEST_SCRATCH_DIR}" "${blob_credentials_file}" "${build_dir}" "${ENCRYPTION_KEY_FILE}" +log_test_result $? "test_bulkdump_bulkload" diff --git a/fdbbackup/tests/s3_backup_test.sh b/fdbbackup/tests/s3_backup_test.sh index 854c24dfe2a..2664c55c0d5 100755 --- a/fdbbackup/tests/s3_backup_test.sh +++ b/fdbbackup/tests/s3_backup_test.sh @@ -80,88 +80,6 @@ function resolve_to_absolute_path { realpath "${p}" } -function create_encryption_key_file { - local key_file="${1}" - log "Creating encryption key file at ${key_file}" - dd if=/dev/urandom bs=32 count=1 of="${key_file}" 2>/dev/null - chmod 600 "${key_file}" -} - -# Run the fdbbackup command. -# $1 The build directory -# $2 The scratch directory -# $3 The S3 url -# $4 credentials file -# $5 encryption key file (optional) -function backup { - local local_build_dir="${1}" - local local_scratch_dir="${2}" - local local_url="${3}" - local local_credentials="${4}" - local local_encryption_key_file="${5:-}" - - local cmd_args=( - "-C" "${local_scratch_dir}/loopback_cluster/fdb.cluster" - "-t" "${TAG}" "-w" - "-d" "${local_url}" - "-k" '"" \xff' - "--log" "--logdir=${local_scratch_dir}" - "--blob-credentials" "${local_credentials}" - ) - - if [[ -n "${local_encryption_key_file}" ]]; then - cmd_args+=("--encryption-key-file" "${local_encryption_key_file}") - fi - - if [[ "${USE_PARTITIONED_LOG}" == "true" ]]; then - cmd_args+=("--partitioned-log-experimental") - fi - - for knob in "${KNOBS[@]}"; do - cmd_args+=("${knob}") - done - - if ! "${local_build_dir}"/bin/fdbbackup start "${cmd_args[@]}"; then - err "Start fdbbackup failed" - return 1 - fi -} - -# Run the fdbrestore command. -# $1 The build directory -# $2 The scratch directory -# $3 The S3 url -# $4 credentials file -# $5 encryption key file (optional) -function restore { - local local_build_dir="${1}" - local local_scratch_dir="${2}" - local local_url="${3}" - local local_credentials="${4}" - local local_encryption_key_file="${5:-}" - - local cmd_args=( - "--dest-cluster-file" "${local_scratch_dir}/loopback_cluster/fdb.cluster" - "-t" "${TAG}" "-w" - "-r" "${local_url}" - "--log" "--logdir=${local_scratch_dir}" - "--blob-credentials" "${local_credentials}" - ) - - if [[ -n "${local_encryption_key_file}" ]]; then - cmd_args+=("--encryption-key-file" "${local_encryption_key_file}") - fi - - for knob in "${KNOBS[@]}"; do - cmd_args+=("${knob}") - done - - if ! "${local_build_dir}"/bin/fdbrestore start "${cmd_args[@]}"; then - err "Start fdbrestore failed" - return 1 - fi -} - # Run a backup to s3 and then a restore. # $1 The url to use # $2 the scratch directory @@ -175,24 +93,11 @@ function test_s3_backup_and_restore { local local_build_dir="${4}" local local_encryption_key_file="${5:-}" - # Edit the url. Backup adds 'data' to the path. Need this url for - # cleanup of test data. + # Edit the url. Backup adds 'data' to the path. Need this url for cleanup. local edited_url=$(echo "${local_url}" | sed -e "s/ctest/data\/ctest/" ) readonly edited_url - if [[ "${USE_S3}" == "true" ]]; then - # Run this rm only if s3. In MockS3Server, it would fail because - # bucket doesn't exist yet (they are lazily created). - local preclear_cmd=("${local_build_dir}/bin/s3client") - preclear_cmd+=("${KNOBS[@]}") - preclear_cmd+=("--tls-ca-file" "${TLS_CA_FILE}") - preclear_cmd+=("--blob-credentials" "${credentials}") - preclear_cmd+=("--log" "--logdir" "${local_scratch_dir}") - preclear_cmd+=("rm" "${edited_url}") - - if ! "${preclear_cmd[@]}"; then - err "Failed pre-cleanup rm of ${edited_url}" - return 1 - fi + if ! s3_preclear_url "${local_build_dir}" "${local_scratch_dir}" "${edited_url}" "${credentials}"; then + return 1 fi log "Load data" if ! load_data "${local_build_dir}" "${local_scratch_dir}"; then @@ -200,10 +105,13 @@ function test_s3_backup_and_restore { return 1 fi log "Run s3 backup" - if ! backup "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${credentials}" "${local_encryption_key_file}"; then + if ! run_backup "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${TAG}" "${local_encryption_key_file}" "" "${credentials}"; then err "Failed backup" return 1 fi + + test_fdbcli_status_json_for_bkup "${local_build_dir}" "${local_scratch_dir}" + log "Clear fdb data" if ! clear_data "${local_build_dir}" "${local_scratch_dir}"; then err "Failed clear data in fdb" @@ -211,10 +119,10 @@ function test_s3_backup_and_restore { fi # Test encryption mismatches (always run to test both encrypted and unencrypted scenarios) log "Testing encryption mismatches" - test_encryption_mismatches "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${credentials}" "${local_encryption_key_file}" + test_encryption_mismatches "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${TAG}" "${local_encryption_key_file}" "${credentials}" log "Restore from s3" - if ! restore "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${credentials}" "${local_encryption_key_file}"; then + if ! run_restore "${local_build_dir}" "${local_scratch_dir}" "${local_url}" "${TAG}" "${local_encryption_key_file}" "" "${credentials}"; then err "Failed restore" return 1 fi @@ -225,20 +133,7 @@ function test_s3_backup_and_restore { fi # Cleanup test data. - local cleanup_cmd=("${local_build_dir}/bin/s3client") - cleanup_cmd+=("${KNOBS[@]}") - - # Only add TLS CA file for real S3, not MockS3Server - if [[ "${USE_S3}" == "true" ]]; then - cleanup_cmd+=("--tls-ca-file" "${TLS_CA_FILE}") - fi - - cleanup_cmd+=("--blob-credentials" "${credentials}") - cleanup_cmd+=("--log" "--logdir" "${local_scratch_dir}") - cleanup_cmd+=("rm" "${edited_url}") - - if ! "${cleanup_cmd[@]}"; then - err "Failed rm of ${edited_url}" + if ! s3_cleanup_url "${local_build_dir}" "${local_scratch_dir}" "${edited_url}" "${credentials}"; then return 1 fi log "Check for Severity=40 errors" @@ -248,126 +143,6 @@ function test_s3_backup_and_restore { fi } -# Test all encryption mismatch scenarios - all should fail -# $1 The build directory -# $2 The scratch directory -# $3 The S3 url -# $4 credentials file -# $5 encryption key file used for backup (empty if no encryption) -function test_encryption_mismatches { - local local_build_dir="${1}" - local local_scratch_dir="${2}" - local local_url="${3}" - local local_credentials="${4}" - local backup_encryption_key_file="${5}" - - # Create separate log directory for encryption mismatch tests - local mismatch_logdir="${local_scratch_dir}/encryption_mismatch_logs" - mkdir -p "${mismatch_logdir}" - - if [[ -n "${backup_encryption_key_file}" ]]; then - # Backup was encrypted - test mismatches - log "Testing encryption mismatches for encrypted backup" - - # Test 1: Encrypted backup → restore without encryption (should fail) - log "Test 1: Attempting restore without encryption on encrypted backup (should fail)" - local cmd_args1=( - "--dest-cluster-file" "${local_scratch_dir}/loopback_cluster/fdb.cluster" - "-t" "${TAG}" "-w" - "-r" "${local_url}" - "--log" "--logdir=${mismatch_logdir}" - "--blob-credentials" "${local_credentials}" - ) - for knob in "${KNOBS[@]}"; do - cmd_args1+=("${knob}") - done - - set +e - "${local_build_dir}"/bin/fdbrestore start "${cmd_args1[@]}" - local exit_code1=$? - set -e - - if [[ ${exit_code1} -eq 0 ]]; then - err "ERROR: Restore without encryption on encrypted backup succeeded when it should have failed!" - rm -rf "${mismatch_logdir}" - return 1 - fi - log "SUCCESS: Restore without encryption on encrypted backup failed as expected" - - # Test 3: Encrypted backup → restore with wrong encryption key (should fail) - local wrong_key_file="${local_scratch_dir}/wrong_key" - create_encryption_key_file "${wrong_key_file}" - - log "Test 3: Attempting restore with wrong encryption key (should fail)" - local cmd_args3=( - "--dest-cluster-file" "${local_scratch_dir}/loopback_cluster/fdb.cluster" - "-t" "${TAG}" "-w" - "-r" "${local_url}" - "--log" "--logdir=${mismatch_logdir}" - "--blob-credentials" "${local_credentials}" - "--encryption-key-file" "${wrong_key_file}" - ) - for knob in "${KNOBS[@]}"; do - cmd_args3+=("${knob}") - done - - set +e - "${local_build_dir}"/bin/fdbrestore start "${cmd_args3[@]}" - local exit_code3=$? - set -e - - rm -f "${wrong_key_file}" - - if [[ ${exit_code3} -eq 0 ]]; then - err "ERROR: Restore with wrong encryption key succeeded when it should have failed!" - rm -rf "${mismatch_logdir}" - return 1 - fi - log "SUCCESS: Restore with wrong encryption key failed as expected" - - else - # Backup was not encrypted - test mismatch - log "Testing encryption mismatch for unencrypted backup" - - # Test 2: Unencrypted backup → restore with encryption (should fail) - local any_key_file="${local_scratch_dir}/any_key" - create_encryption_key_file "${any_key_file}" - - log "Test 2: Attempting restore with encryption on unencrypted backup (should fail)" - local cmd_args2=( - "--dest-cluster-file" "${local_scratch_dir}/loopback_cluster/fdb.cluster" - "-t" "${TAG}" "-w" - "-r" "${local_url}" - "--log" "--logdir=${mismatch_logdir}" - "--blob-credentials" "${local_credentials}" - "--encryption-key-file" "${any_key_file}" - ) - for knob in "${KNOBS[@]}"; do - cmd_args2+=("${knob}") - done - - set +e - "${local_build_dir}"/bin/fdbrestore start "${cmd_args2[@]}" - local exit_code2=$? - set -e - - rm -f "${any_key_file}" - - if [[ ${exit_code2} -eq 0 ]]; then - err "ERROR: Restore with encryption on unencrypted backup succeeded when it should have failed!" - rm -rf "${mismatch_logdir}" - return 1 - fi - log "SUCCESS: Restore with encryption on unencrypted backup failed as expected" - fi - - # Clean up separate log directory - rm -rf "${mismatch_logdir}" - - log "All encryption mismatch tests completed successfully" - return 0 -} - # set -o xtrace # a.k.a set -x # Set this one when debugging (or 'bash -x THIS_SCRIPT'). set -o errexit # a.k.a. set -e set -o nounset # a.k.a. set -u @@ -413,66 +188,35 @@ if [ ${#PARAMS[@]} -ne 0 ]; then set -- "${PARAMS[@]}" fi -# Globals -# TEST_SCRATCH_DIR gets set below. Tests should be their data in here. -# It gets cleaned up on the way out of the test. -TEST_SCRATCH_DIR= -readonly HTTP_VERBOSE_LEVEL=10 -readonly TAG="test_backup" -# Should we use S3? If USE_S3 is not defined, then check if -# OKTETO_NAMESPACE is defined (It is defined on the okteto -# internal apple dev environments where S3 is available). -readonly USE_S3="${USE_S3:-$( if [[ -n "${OKTETO_NAMESPACE+x}" ]]; then echo "true" ; else echo "false"; fi )}" - -# Set KNOBS based on whether we're using real S3 or MockS3Server -if [[ "${USE_S3}" == "true" ]]; then - # Use AWS KMS encryption for real S3 - KNOBS=("--knob_blobstore_encryption_type=aws:kms" "--knob_http_verbose_level=${HTTP_VERBOSE_LEVEL}") -else - # No encryption for MockS3Server - KNOBS=("--knob_http_verbose_level=${HTTP_VERBOSE_LEVEL}") -fi -readonly KNOBS - -# Set TLS_CA_FILE only when using real S3, not for MockS3Server -if [[ "${USE_S3}" == "true" ]]; then - # Try to find a valid TLS CA file if not explicitly set - if [[ -z "${TLS_CA_FILE:-}" ]]; then - # Common locations for TLS CA files on different systems - for ca_file in "/etc/pki/tls/cert.pem" "/etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem" "/etc/ssl/certs/ca-certificates.crt" "/etc/pki/tls/certs/ca-bundle.crt" "/etc/ssl/cert.pem" "/usr/local/share/ca-certificates/"; do - if [[ -f "${ca_file}" ]]; then - TLS_CA_FILE="${ca_file}" - break - fi - done - fi - TLS_CA_FILE="${TLS_CA_FILE:-}" -else - # For MockS3Server, don't use TLS - TLS_CA_FILE="" -fi -readonly TLS_CA_FILE -# Clear these environment variables. fdbbackup goes looking for them -# and if EITHER is set, it will go via a proxy instead of to where we. -# want it to go. -unset HTTP_PROXY -unset HTTPS_PROXY - # Get the working directory for this script. if ! path=$(resolve_to_absolute_path "${BASH_SOURCE[0]}"); then - err "Failed resolve_to_absolute_path" + echo "Failed resolve_to_absolute_path" >&2 exit 1 fi if ! cwd=$( cd -P "$( dirname "${path}" )" >/dev/null 2>&1 && pwd ); then - err "Failed dirname on ${path}" + echo "Failed dirname on ${path}" >&2 exit 1 fi readonly cwd + +# Source common test functions first (needed for setup_backup_test_environment) # shellcheck source=/dev/null if ! source "${cwd}/../../fdbclient/tests/tests_common.sh"; then - err "Failed to source tests_common.sh" + echo "Failed to source tests_common.sh" >&2 + exit 1 +fi +# shellcheck source=/dev/null +if ! source "${cwd}/backup_tests_common.sh"; then + echo "Failed to source backup_tests_common.sh" >&2 exit 1 fi + +# Globals +TEST_SCRATCH_DIR= +readonly TAG="test_backup" + +# Setup common environment (USE_S3, KNOBS, TLS_CA_FILE, clears HTTP_PROXY/HTTPS_PROXY) +setup_backup_test_environment 10 # Process command-line options. if (( $# < 2 )) || (( $# > 3 )); then echo "ERROR: ${0} requires the fdb src and build directories --" @@ -514,86 +258,16 @@ fi readonly ENCRYPTION_KEY_FILE readonly USE_PARTITIONED_LOG -# Set host, bucket, and blob_credentials_file whether MockS3Server or s3. -readonly path_prefix="ctests" -host= -query_str= -blob_credentials_file= -if [[ "${USE_S3}" == "true" ]]; then - log "Testing against s3" - # Now source in the aws fixture so we can use its methods in the below. - # shellcheck source=/dev/null - if ! source "${cwd}/../../fdbclient/tests/aws_fixture.sh"; then - err "Failed to source aws_fixture.sh" - exit 1 - fi - if ! TEST_SCRATCH_DIR=$( create_aws_dir "${scratch_dir}" ); then - err "Failed creating local aws_dir" - exit 1 - fi - readonly TEST_SCRATCH_DIR - if ! readarray -t configs < <(aws_setup "${build_dir}" "${TEST_SCRATCH_DIR}"); then - err "Failed aws_setup" - return 1 - fi - readonly host="${configs[0]}" - readonly bucket="${configs[1]}" - readonly blob_credentials_file="${configs[2]}" - readonly region="${configs[3]}" - query_str="bucket=${bucket}®ion=${region}&secure_connection=1" - # Make these environment variables available for the fdb cluster and backup_agent when s3. - export FDB_BLOB_CREDENTIALS="${blob_credentials_file}" - export FDB_TLS_CA_FILE="${TLS_CA_FILE}" -else - log "Testing against MockS3Server" - # Now source in the mocks3 fixture so we can use its methods in the below. - # shellcheck source=/dev/null - if ! source "${cwd}/../../fdbclient/tests/mocks3_fixture.sh"; then - err "Failed to source mocks3_fixture.sh" - exit 1 - fi - if ! TEST_SCRATCH_DIR=$(mktemp -d "${scratch_dir}/mocks3_backup_test.XXXXXX"); then - err "Failed create of the mocks3 test dir." >&2 - exit 1 - fi - readonly TEST_SCRATCH_DIR - # Pass test scratch dir as persistence directory so files are cleaned up with test - if ! start_mocks3 "${build_dir}" "${TEST_SCRATCH_DIR}/mocks3_data"; then - err "Failed to start MockS3Server" - exit 1 - fi - readonly host="${MOCKS3_HOST}:${MOCKS3_PORT}" - readonly bucket="test-bucket" - readonly region="us-east-1" - # Create an empty blob credentials file (MockS3Server uses simple auth) - readonly blob_credentials_file="${TEST_SCRATCH_DIR}/blob_credentials.json" - echo '{}' > "${blob_credentials_file}" - # Let the connection to MockS3Server be insecure -- not-TLS - query_str="bucket=${bucket}®ion=${region}&secure_connection=0" - # Set environment variables for MockS3Server - export FDB_BLOB_CREDENTIALS="${blob_credentials_file}" -fi +# Setup S3/MockS3 environment using common function +readonly temp_dir_prefix="mocks3_backup_test" +readonly url_path_prefix="ctests" +setup_s3_environment "${build_dir}" "${scratch_dir}" "${temp_dir_prefix}" -# Source in the fdb cluster. -# shellcheck source=/dev/null -if ! source "${cwd}/../../fdbclient/tests/fdb_cluster_fixture.sh"; then - err "Failed to source fdb_cluster_fixture.sh" - exit 1 -fi -# Startup fdb cluster and backup agent. -if ! start_fdb_cluster "${source_dir}" "${build_dir}" "${TEST_SCRATCH_DIR}" 1; then - err "Failed start FDB cluster" - exit 1 -fi -log "FDB cluster is up" -if ! start_backup_agent "${build_dir}" "${TEST_SCRATCH_DIR}" "${KNOBS[@]}"; then - err "Failed start backup_agent" - exit 1 -fi -log "Backup_agent is up" +# Startup fdb cluster and backup agent +setup_fdb_cluster_with_backup "${source_dir}" "${build_dir}" "${TEST_SCRATCH_DIR}" 1 # Run tests. test="test_s3_backup_and_restore" -url="blobstore://${host}/${path_prefix}/${test}?${query_str}" +url="blobstore://${host}/${url_path_prefix}/${test}?${query_str}" test_s3_backup_and_restore "${url}" "${TEST_SCRATCH_DIR}" "${blob_credentials_file}" "${build_dir}" "${ENCRYPTION_KEY_FILE}" log_test_result $? "test_s3_backup_and_restore" diff --git a/fdbcli/AuditStorageCommand.actor.cpp b/fdbcli/AuditStorageCommand.actor.cpp index 8a526c01b68..0c19f8c5574 100644 --- a/fdbcli/AuditStorageCommand.actor.cpp +++ b/fdbcli/AuditStorageCommand.actor.cpp @@ -18,6 +18,46 @@ * limitations under the License. */ +/* + * ============================================================================ + * AUDIT STORAGE COMMANDS + * ============================================================================ + * + * This file implements CLI commands for various storage audit operations: + * - audit_storage ha : Validate high availability + * - audit_storage replica : Validate replica consistency + * - audit_storage locationmetadata : Validate location metadata + * - audit_storage ssshard : Validate storage server shards + * - audit_storage validate_restore : Validate restored backup data + * + * ============================================================================ + * RESTORE VALIDATION (validate_restore) - Quick Reference + * ============================================================================ + * + * USAGE: audit_storage validate_restore + * + * Validates that restored backup data matches original source data. + * + * EXAMPLE WORKFLOW: + * 1. Backup: fdbbackup start -d file:///backup -z + * 2. Stop: fdbbackup discontinue -C + * 3. Lock: fdb> lock (save the returned UID) + * 4. Restore: fdbrestore start -r file:///backup --add-prefix "\xff\x02/rlog/" + * 5. Validate: fdb> audit_storage validate_restore "" "\xff" + * 6. Check: fdb> get_audit_status validate_restore id + * 7. Unlock: fdb> unlock + * 8. Cleanup: fdb> clearrange "\xff\x02/rlog/" "\xff\x02/rlog0" + * + * NOTE: Steps 2-3 (stop backup and lock) prevent writes during validation, avoiding + * false positive audit errors. The --add-prefix parameter in step 4 allows the restore + * to run on a non-empty database, enabling validation by comparing restored data + * against the source. Both restore and audit use LOCK_AWARE transactions, so they + * work on a locked database. + * + * See fdbserver/storageserver.actor.cpp for detailed implementation docs. + * ============================================================================ + */ + #include "fdbcli/fdbcli.actor.h" #include "fdbclient/IClientApi.h" @@ -54,6 +94,8 @@ ACTOR Future auditStorageCommandActor(Reference c type = AuditType::ValidateLocationMetadata; } else if (tokencmp(tokens[2], "ssshard")) { type = AuditType::ValidateStorageServerShard; + } else if (tokencmp(tokens[2], "validate_restore")) { + type = AuditType::ValidateRestore; } else { printUsage(tokens[0]); return UID(); @@ -72,6 +114,8 @@ ACTOR Future auditStorageCommandActor(Reference c type = AuditType::ValidateLocationMetadata; } else if (tokencmp(tokens[1], "ssshard")) { type = AuditType::ValidateStorageServerShard; + } else if (tokencmp(tokens[1], "validate_restore")) { + type = AuditType::ValidateRestore; } else { printUsage(tokens[0]); return UID(); @@ -119,7 +163,7 @@ CommandFactory auditStorageFactory( CommandHelp("audit_storage [BeginKey EndKey] ", "Start an audit storage", "Specify audit `Type' (only `ha' and `replica' and `locationmetadata' and " - "`ssshard' `Type' are supported currently), and\n" + "`ssshard' and `validate_restore' `Type' are supported currently), and\n" "optionally a sub-range with `BeginKey' and `EndKey'.\n" "Specify audit `EngineType' when auditType is `ha' or `replica'\n" "(only `ssd-rocksdb-v1' and `ssd-sharded-rocksdb' and `ssd-2' are supported).\n" diff --git a/fdbcli/BulkLoadCommand.actor.cpp b/fdbcli/BulkLoadCommand.actor.cpp index c246f227e79..94d947c5a40 100644 --- a/fdbcli/BulkLoadCommand.actor.cpp +++ b/fdbcli/BulkLoadCommand.actor.cpp @@ -146,26 +146,6 @@ ACTOR Future printBulkLoadJobProgress(Database cx, BulkLoadJobState job) { return Void(); } -ACTOR Future getBulkLoadMode(Database cx) { - state Transaction tr(cx); - loop { - try { - tr.setOption(FDBTransactionOptions::LOCK_AWARE); - tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS); - tr.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE); - state int oldMode = 0; - Optional oldModeValue = wait(tr.get(bulkLoadModeKey)); - if (oldModeValue.present()) { - BinaryReader rd(oldModeValue.get(), Unversioned()); - rd >> oldMode; - } - return oldMode; - } catch (Error& e) { - wait(tr.onError(e)); - } - } -} - ACTOR Future bulkLoadCommandActor(Database cx, std::vector tokens) { if (tokencmp(tokens[1], "mode")) { if (tokens.size() == 2) { diff --git a/fdbcli/ConfigureCommand.actor.cpp b/fdbcli/ConfigureCommand.actor.cpp index fd49eb3f3de..4d9a68cee6b 100644 --- a/fdbcli/ConfigureCommand.actor.cpp +++ b/fdbcli/ConfigureCommand.actor.cpp @@ -153,9 +153,20 @@ ACTOR Future configureCommandActor(Reference db, } } - ConfigurationResult r = wait(ManagementAPI::changeConfig( - db, std::vector(tokens.begin() + startToken, tokens.end()), conf, force)); - result = r; + // Check for backup_worker_enabled configuration and reject it. + // This setting is now managed automatically by the backup system. + for (auto it = tokens.begin() + startToken; it != tokens.end(); ++it) { + if (it->startsWith("backup_worker_enabled:="_sr)) { + result = ConfigurationResult::BACKUP_WORKER_ENABLED_RESTRICTED; + break; + } + } + + if (result != ConfigurationResult::BACKUP_WORKER_ENABLED_RESTRICTED) { + ConfigurationResult r = wait(ManagementAPI::changeConfig( + db, std::vector(tokens.begin() + startToken, tokens.end()), conf, force)); + result = r; + } } // Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but @@ -266,14 +277,16 @@ ACTOR Future configureCommandActor(Reference db, "ERROR: a result of type `ConfigurationResult::DATABASE_IS_REGISTERED` was unexpectedly seen.\n"); ret = false; break; - case ConfigurationResult::ENCRYPTION_AT_REST_MODE_ALREADY_SET: - fprintf(stderr, "ERROR: A cluster cannot change its encryption_at_rest state after database creation.\n"); - ret = false; - break; case ConfigurationResult::INVALID_STORAGE_TYPE: fprintf(stderr, "ERROR: Invalid storage type for storage or TLog.\n"); ret = false; break; + case ConfigurationResult::BACKUP_WORKER_ENABLED_RESTRICTED: + fprintf(stderr, + "ERROR: backup_worker_enabled configuration is restricted in fdbcli and managed automatically by the " + "backup system.\n"); + ret = false; + break; default: ASSERT(false); ret = false; @@ -307,7 +320,6 @@ void configureGenerator(const char* text, // TODO(zhewu): update fdbcli command documentation. "perpetual_storage_wiggle_engine=", "storage_migration_type=", - "encryption_at_rest_mode=", nullptr }; arrayGenerator(text, line, opts, lc); } @@ -320,7 +332,6 @@ CommandFactory configureFactory( "commit_proxies=|grv_proxies=|logs=|resolvers=>*|" "count=|perpetual_storage_wiggle=|perpetual_storage_wiggle_locality=" "<:|0>|storage_migration_type={disabled|gradual|aggressive}" - "|encryption_at_rest_mode={disabled|domain_aware|cluster_aware}" "|exclude=", "change the database configuration", "The `new' option, if present, initializes a new database with the given configuration rather than changing " @@ -352,10 +363,6 @@ CommandFactory configureFactory( "perpetual_storage_wiggle_locality=<:|0>: Set the process filter for wiggling. " "The processes that match the given locality key and locality value are only wiggled. The value 0 will disable " "the locality filter and matches all the processes for wiggling.\n\n" - "encryption_at_rest_mode=: Sets the cluster encryption data at-rest " - "support for the " - "database. The configuration can be updated ONLY at the time of database creation and once set can't be " - "updated for the lifetime of the database.\n\n" "exclude=: Sets the addresses in the format of IP1:port1,IP2:port2 pairs to be excluded during " "recruitment. Note this should be only used when the database is unavailable because of the faulty processes " "that are blocking the recovery from completion. The number of addresses should be less than the replication " diff --git a/fdbcli/CoordinatorsCommand.actor.cpp b/fdbcli/CoordinatorsCommand.actor.cpp index 91050f15e0d..808358be4ba 100644 --- a/fdbcli/CoordinatorsCommand.actor.cpp +++ b/fdbcli/CoordinatorsCommand.actor.cpp @@ -64,9 +64,7 @@ ACTOR Future changeCoordinators(Reference db, std::vectorstartsWith(nameTokenBegin) && new_cluster_description.empty()) { new_cluster_description = tok->substr(nameTokenBegin.size()); @@ -74,12 +72,6 @@ ACTOR Future changeCoordinators(Reference db, std::vectorstartsWith(noConfigDB)) { - disableConfigDB = true; - auto next = tok - 1; - std::copy(tok + 1, tokens.end(), tok); - tokens.resize(tokens.size() - 1); - tok = next; } } @@ -92,10 +84,6 @@ ACTOR Future changeCoordinators(Reference db, std::vectorset(fdb_cli::clusterDescriptionSpecialKey, new_cluster_description); } - if (disableConfigDB) { - // All that matters is the key is set. - tr->set(fdb_cli::configDBSpecialKey, ""_sr); - } // if auto change, read the special key to retrieve the recommended config if (automatic) { // if previous read failed, retry, otherwise, use the same recommended config @@ -187,7 +175,6 @@ ACTOR Future changeCoordinators(Reference db, std::vector fileConfigureCommandActor(Reference db, std::string filePath, bool isNewDatabase, bool force) { + state ConfigurationResult result; std::string contents(readFileBytes(filePath, 100000)); json_spirit::mValue config; if (!json_spirit::read_string(contents, config)) { @@ -77,7 +78,14 @@ ACTOR Future fileConfigureCommandActor(Reference db, configString.erase(0, 1); // configureStringFromJSON returns a string with leading space. } - ConfigurationResult result = wait(ManagementAPI::changeConfig(db, configString, force)); + // Check for backup_worker_enabled configuration and reject it. + // This setting is now managed automatically by the backup system. + if (configString.find(" backup_worker_enabled:=") != std::string::npos) { + result = ConfigurationResult::BACKUP_WORKER_ENABLED_RESTRICTED; + } else { + ConfigurationResult r = wait(ManagementAPI::changeConfig(db, configString, force)); + result = r; + } // Real errors get thrown from makeInterruptable and printed by the catch block in cli(), but // there are various results specific to changeConfig() that we need to report: bool ret = true; @@ -156,6 +164,12 @@ ACTOR Future fileConfigureCommandActor(Reference db, case ConfigurationResult::SUCCESS: printf("Configuration changed\n"); break; + case ConfigurationResult::BACKUP_WORKER_ENABLED_RESTRICTED: + fprintf(stderr, + "ERROR: backup_worker_enabled configuration is restricted in fdbcli and managed automatically by the " + "backup system\n"); + ret = false; + break; default: ASSERT(false); ret = false; diff --git a/fdbcli/GetAuditStatusCommand.actor.cpp b/fdbcli/GetAuditStatusCommand.actor.cpp index d99ac0d1a92..bc8b40620cd 100644 --- a/fdbcli/GetAuditStatusCommand.actor.cpp +++ b/fdbcli/GetAuditStatusCommand.actor.cpp @@ -134,7 +134,7 @@ ACTOR Future getAuditProgressByServer(Database cx, ACTOR Future getAuditProgress(Database cx, AuditType auditType, UID auditId, KeyRange auditRange) { if (auditType == AuditType::ValidateHA || auditType == AuditType::ValidateReplica || - auditType == AuditType::ValidateLocationMetadata) { + auditType == AuditType::ValidateLocationMetadata || auditType == AuditType::ValidateRestore) { wait(getAuditProgressByRange(cx, auditType, auditId, auditRange)); } else if (auditType == AuditType::ValidateStorageServerShard) { state std::vector> fs; @@ -186,6 +186,8 @@ ACTOR Future getAuditStatusCommandActor(Database cx, std::vector getAuditStatusCommandActor(Database cx, std::vector", "clear a key from the database", @@ -585,17 +582,6 @@ void initHelp() { "set a value for a given key", "If KEY is not already present in the database, it will be created." ESCAPINGKV); - helpMap["setknob"] = CommandHelp("setknob [CONFIG_CLASS]", - "updates a knob to specified value", - "setknob will prompt for a description of the changes" ESCAPINGKV); - - helpMap["getknob"] = CommandHelp( - "getknob [CONFIG_CLASS]", "gets the value of the specified knob", "CONFIG_CLASS is optional." ESCAPINGK); - - helpMap["clearknob"] = CommandHelp("clearknob [CONFIG_CLASS]", - "clears the value of the specified knob in the configuration database", - "CONFIG_CLASS is optional." ESCAPINGK); - helpMap["option"] = CommandHelp( "option